10 5 6 1 4 6 1 1 17 16 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Misc and compatibility things * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/init.h> #include <linux/export.h> #include <linux/moduleparam.h> #include <linux/time.h> #include <linux/slab.h> #include <linux/ioport.h> #include <linux/fs.h> #include <sound/core.h> #ifdef CONFIG_SND_DEBUG #ifdef CONFIG_SND_DEBUG_VERBOSE #define DEFAULT_DEBUG_LEVEL 2 #else #define DEFAULT_DEBUG_LEVEL 1 #endif static int debug = DEFAULT_DEBUG_LEVEL; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Debug level (0 = disable)"); #endif /* CONFIG_SND_DEBUG */ void release_and_free_resource(struct resource *res) { if (res) { release_resource(res); kfree(res); } } EXPORT_SYMBOL(release_and_free_resource); #ifdef CONFIG_SND_VERBOSE_PRINTK /* strip the leading path if the given path is absolute */ static const char *sanity_file_name(const char *path) { if (*path == '/') return strrchr(path, '/') + 1; else return path; } #endif #if defined(CONFIG_SND_DEBUG) || defined(CONFIG_SND_VERBOSE_PRINTK) void __snd_printk(unsigned int level, const char *path, int line, const char *format, ...) { va_list args; #ifdef CONFIG_SND_VERBOSE_PRINTK int kern_level; struct va_format vaf; char verbose_fmt[] = KERN_DEFAULT "ALSA %s:%d %pV"; bool level_found = false; #endif #ifdef CONFIG_SND_DEBUG if (debug < level) return; #endif va_start(args, format); #ifdef CONFIG_SND_VERBOSE_PRINTK vaf.fmt = format; vaf.va = &args; while ((kern_level = printk_get_level(vaf.fmt)) != 0) { const char *end_of_header = printk_skip_level(vaf.fmt); /* Ignore KERN_CONT. We print filename:line for each piece. */ if (kern_level >= '0' && kern_level <= '7') { memcpy(verbose_fmt, vaf.fmt, end_of_header - vaf.fmt); level_found = true; } vaf.fmt = end_of_header; } if (!level_found && level) memcpy(verbose_fmt, KERN_DEBUG, sizeof(KERN_DEBUG) - 1); printk(verbose_fmt, sanity_file_name(path), line, &vaf); #else vprintk(format, args); #endif va_end(args); } EXPORT_SYMBOL_GPL(__snd_printk); #endif #ifdef CONFIG_PCI #include <linux/pci.h> /** * snd_pci_quirk_lookup_id - look up a PCI SSID quirk list * @vendor: PCI SSV id * @device: PCI SSD id * @list: quirk list, terminated by a null entry * * Look through the given quirk list and finds a matching entry * with the same PCI SSID. When subdevice is 0, all subdevice * values may match. * * Returns the matched entry pointer, or NULL if nothing matched. */ const struct snd_pci_quirk * snd_pci_quirk_lookup_id(u16 vendor, u16 device, const struct snd_pci_quirk *list) { const struct snd_pci_quirk *q; for (q = list; q->subvendor || q->subdevice; q++) { if (q->subvendor != vendor) continue; if (!q->subdevice || (device & q->subdevice_mask) == q->subdevice) return q; } return NULL; } EXPORT_SYMBOL(snd_pci_quirk_lookup_id); /** * snd_pci_quirk_lookup - look up a PCI SSID quirk list * @pci: pci_dev handle * @list: quirk list, terminated by a null entry * * Look through the given quirk list and finds a matching entry * with the same PCI SSID. When subdevice is 0, all subdevice * values may match. * * Returns the matched entry pointer, or NULL if nothing matched. */ const struct snd_pci_quirk * snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list) { if (!pci) return NULL; return snd_pci_quirk_lookup_id(pci->subsystem_vendor, pci->subsystem_device, list); } EXPORT_SYMBOL(snd_pci_quirk_lookup); #endif /* * Deferred async signal helpers * * Below are a few helper functions to wrap the async signal handling * in the deferred work. The main purpose is to avoid the messy deadlock * around tasklist_lock and co at the kill_fasync() invocation. * fasync_helper() and kill_fasync() are replaced with snd_fasync_helper() * and snd_kill_fasync(), respectively. In addition, snd_fasync_free() has * to be called at releasing the relevant file object. */ struct snd_fasync { struct fasync_struct *fasync; int signal; int poll; int on; struct list_head list; }; static DEFINE_SPINLOCK(snd_fasync_lock); static LIST_HEAD(snd_fasync_list); static void snd_fasync_work_fn(struct work_struct *work) { struct snd_fasync *fasync; spin_lock_irq(&snd_fasync_lock); while (!list_empty(&snd_fasync_list)) { fasync = list_first_entry(&snd_fasync_list, struct snd_fasync, list); list_del_init(&fasync->list); spin_unlock_irq(&snd_fasync_lock); if (fasync->on) kill_fasync(&fasync->fasync, fasync->signal, fasync->poll); spin_lock_irq(&snd_fasync_lock); } spin_unlock_irq(&snd_fasync_lock); } static DECLARE_WORK(snd_fasync_work, snd_fasync_work_fn); int snd_fasync_helper(int fd, struct file *file, int on, struct snd_fasync **fasyncp) { struct snd_fasync *fasync = NULL; if (on) { fasync = kzalloc(sizeof(*fasync), GFP_KERNEL); if (!fasync) return -ENOMEM; INIT_LIST_HEAD(&fasync->list); } spin_lock_irq(&snd_fasync_lock); if (*fasyncp) { kfree(fasync); fasync = *fasyncp; } else { if (!fasync) { spin_unlock_irq(&snd_fasync_lock); return 0; } *fasyncp = fasync; } fasync->on = on; spin_unlock_irq(&snd_fasync_lock); return fasync_helper(fd, file, on, &fasync->fasync); } EXPORT_SYMBOL_GPL(snd_fasync_helper); void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) { unsigned long flags; if (!fasync || !fasync->on) return; spin_lock_irqsave(&snd_fasync_lock, flags); fasync->signal = signal; fasync->poll = poll; list_move(&fasync->list, &snd_fasync_list); schedule_work(&snd_fasync_work); spin_unlock_irqrestore(&snd_fasync_lock, flags); } EXPORT_SYMBOL_GPL(snd_kill_fasync); void snd_fasync_free(struct snd_fasync *fasync) { if (!fasync) return; fasync->on = 0; flush_work(&snd_fasync_work); kfree(fasync); } EXPORT_SYMBOL_GPL(snd_fasync_free); |
9 72 72 160 160 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | // SPDX-License-Identifier: GPL-2.0-only /* * Support KVM gust page tracking * * This feature allows us to track page access in guest. Currently, only * write access is tracked. * * Copyright(C) 2015 Intel Corporation. * * Author: * Xiao Guangrong <guangrong.xiao@linux.intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/lockdep.h> #include <linux/kvm_host.h> #include <linux/rculist.h> #include "mmu.h" #include "mmu_internal.h" #include "page_track.h" bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) { return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) || !tdp_enabled || kvm_shadow_root_allocated(kvm); } void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) { kvfree(slot->arch.gfn_write_track); slot->arch.gfn_write_track = NULL; } static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, unsigned long npages) { const size_t size = sizeof(*slot->arch.gfn_write_track); if (!slot->arch.gfn_write_track) slot->arch.gfn_write_track = __vcalloc(npages, size, GFP_KERNEL_ACCOUNT); return slot->arch.gfn_write_track ? 0 : -ENOMEM; } int kvm_page_track_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { if (!kvm_page_track_write_tracking_enabled(kvm)) return 0; return __kvm_page_track_write_tracking_alloc(slot, npages); } int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) { return __kvm_page_track_write_tracking_alloc(slot, slot->npages); } static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, short count) { int index, val; index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); val = slot->arch.gfn_write_track[index]; if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) return; slot->arch.gfn_write_track[index] += count; } void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) { lockdep_assert_held_write(&kvm->mmu_lock); lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || srcu_read_lock_held(&kvm->srcu)); if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) return; update_gfn_write_track(slot, gfn, 1); /* * new track stops large page mapping for the * tracked page. */ kvm_mmu_gfn_disallow_lpage(slot, gfn); if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) kvm_flush_remote_tlbs(kvm); } void __kvm_write_track_remove_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) { lockdep_assert_held_write(&kvm->mmu_lock); lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || srcu_read_lock_held(&kvm->srcu)); if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) return; update_gfn_write_track(slot, gfn, -1); /* * allow large page mapping for the tracked page * after the tracker is gone. */ kvm_mmu_gfn_allow_lpage(slot, gfn); } /* * check if the corresponding access on the specified guest page is tracked. */ bool kvm_gfn_is_write_tracked(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn) { int index; if (!slot) return false; if (!kvm_page_track_write_tracking_enabled(kvm)) return false; index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); return !!READ_ONCE(slot->arch.gfn_write_track[index]); } #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING void kvm_page_track_cleanup(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; head = &kvm->arch.track_notifier_head; cleanup_srcu_struct(&head->track_srcu); } int kvm_page_track_init(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; head = &kvm->arch.track_notifier_head; INIT_HLIST_HEAD(&head->track_notifier_list); return init_srcu_struct(&head->track_srcu); } /* * register the notifier so that event interception for the tracked guest * pages can be received. */ int kvm_page_track_register_notifier(struct kvm *kvm, struct kvm_page_track_notifier_node *n) { struct kvm_page_track_notifier_head *head; if (!kvm || kvm->mm != current->mm) return -ESRCH; kvm_get_kvm(kvm); head = &kvm->arch.track_notifier_head; write_lock(&kvm->mmu_lock); hlist_add_head_rcu(&n->node, &head->track_notifier_list); write_unlock(&kvm->mmu_lock); return 0; } EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); /* * stop receiving the event interception. It is the opposed operation of * kvm_page_track_register_notifier(). */ void kvm_page_track_unregister_notifier(struct kvm *kvm, struct kvm_page_track_notifier_node *n) { struct kvm_page_track_notifier_head *head; head = &kvm->arch.track_notifier_head; write_lock(&kvm->mmu_lock); hlist_del_rcu(&n->node); write_unlock(&kvm->mmu_lock); synchronize_srcu(&head->track_srcu); kvm_put_kvm(kvm); } EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); /* * Notify the node that write access is intercepted and write emulation is * finished at this time. * * The node should figure out if the written page is the one that node is * interested in by itself. */ void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) { struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_node *n; int idx; head = &kvm->arch.track_notifier_head; if (hlist_empty(&head->track_notifier_list)) return; idx = srcu_read_lock(&head->track_srcu); hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, srcu_read_lock_held(&head->track_srcu)) if (n->track_write) n->track_write(gpa, new, bytes, n); srcu_read_unlock(&head->track_srcu, idx); } /* * Notify external page track nodes that a memory region is being removed from * the VM, e.g. so that users can free any associated metadata. */ void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) { struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_node *n; int idx; head = &kvm->arch.track_notifier_head; if (hlist_empty(&head->track_notifier_list)) return; idx = srcu_read_lock(&head->track_srcu); hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, srcu_read_lock_held(&head->track_srcu)) if (n->track_remove_region) n->track_remove_region(slot->base_gfn, slot->npages, n); srcu_read_unlock(&head->track_srcu, idx); } /* * add guest page to the tracking pool so that corresponding access on that * page will be intercepted. * * @kvm: the guest instance we are interested in. * @gfn: the guest page. */ int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; int idx; idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); if (!slot) { srcu_read_unlock(&kvm->srcu, idx); return -EINVAL; } write_lock(&kvm->mmu_lock); __kvm_write_track_add_gfn(kvm, slot, gfn); write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); return 0; } EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); /* * remove the guest page from the tracking pool which stops the interception * of corresponding access on that page. * * @kvm: the guest instance we are interested in. * @gfn: the guest page. */ int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; int idx; idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); if (!slot) { srcu_read_unlock(&kvm->srcu, idx); return -EINVAL; } write_lock(&kvm->mmu_lock); __kvm_write_track_remove_gfn(kvm, slot, gfn); write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); return 0; } EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); #endif |
7964 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ipi #if !defined(_TRACE_IPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IPI_H #include <linux/tracepoint.h> /** * ipi_raise - called when a smp cross call is made * * @mask: mask of recipient CPUs for the IPI * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string. */ TRACE_EVENT(ipi_raise, TP_PROTO(const struct cpumask *mask, const char *reason), TP_ARGS(mask, reason), TP_STRUCT__entry( __bitmask(target_cpus, nr_cpumask_bits) __field(const char *, reason) ), TP_fast_assign( __assign_bitmask(target_cpus, cpumask_bits(mask), nr_cpumask_bits); __entry->reason = reason; ), TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) ); TRACE_EVENT(ipi_send_cpu, TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), TP_ARGS(cpu, callsite, callback), TP_STRUCT__entry( __field(unsigned int, cpu) __field(void *, callsite) __field(void *, callback) ), TP_fast_assign( __entry->cpu = cpu; __entry->callsite = (void *)callsite; __entry->callback = callback; ), TP_printk("cpu=%u callsite=%pS callback=%pS", __entry->cpu, __entry->callsite, __entry->callback) ); TRACE_EVENT(ipi_send_cpumask, TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), TP_ARGS(cpumask, callsite, callback), TP_STRUCT__entry( __cpumask(cpumask) __field(void *, callsite) __field(void *, callback) ), TP_fast_assign( __assign_cpumask(cpumask, cpumask_bits(cpumask)); __entry->callsite = (void *)callsite; __entry->callback = callback; ), TP_printk("cpumask=%s callsite=%pS callback=%pS", __get_cpumask(cpumask), __entry->callsite, __entry->callback) ); DECLARE_EVENT_CLASS(ipi_handler, TP_PROTO(const char *reason), TP_ARGS(reason), TP_STRUCT__entry( __field(const char *, reason) ), TP_fast_assign( __entry->reason = reason; ), TP_printk("(%s)", __entry->reason) ); /** * ipi_entry - called immediately before the IPI handler * * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string, ideally the same as used with trace_ipi_raise * for that IPI. */ DEFINE_EVENT(ipi_handler, ipi_entry, TP_PROTO(const char *reason), TP_ARGS(reason) ); /** * ipi_exit - called immediately after the IPI handler returns * * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string, ideally the same as used with trace_ipi_raise for * that IPI. */ DEFINE_EVENT(ipi_handler, ipi_exit, TP_PROTO(const char *reason), TP_ARGS(reason) ); #endif /* _TRACE_IPI_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
3672 3673 929 929 149 149 81 81 1073 328 1074 1073 328 1074 1074 328 328 427 59 381 48 48 3 2 251 55 25 249 250 25 25 6 6 6 44 44 44 5303 5297 5303 5299 29 24 6499 6502 71 71 1359 1359 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/gfp.h> #include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/fixmap.h> #include <asm/mtrr.h> #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; EXPORT_SYMBOL(physical_mask); #endif #ifdef CONFIG_HIGHPTE #define PGTABLE_HIGHMEM __GFP_HIGHMEM #else #define PGTABLE_HIGHMEM 0 #endif #ifndef CONFIG_PARAVIRT static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) { tlb_remove_page(tlb, table); } #endif gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; pgtable_t pte_alloc_one(struct mm_struct *mm) { return __pte_alloc_one(mm, __userpte_alloc_gfp); } static int __init setup_userpte(char *arg) { if (!arg) return -EINVAL; /* * "userpte=nohigh" disables allocation of user pagetables in * high memory. */ if (strcmp(arg, "nohigh") == 0) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; else return -EINVAL; return 0; } early_param("userpte", setup_userpte); void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { pagetable_pte_dtor(page_ptdesc(pte)); paravirt_release_pte(page_to_pfn(pte)); paravirt_tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { struct ptdesc *ptdesc = virt_to_ptdesc(pmd); paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table * entries need a full cr3 reload to flush. */ #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif pagetable_pmd_dtor(ptdesc); paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc)); } #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { struct ptdesc *ptdesc = virt_to_ptdesc(pud); pagetable_pud_dtor(ptdesc); paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); paravirt_tlb_remove_table(tlb, virt_to_page(pud)); } #if CONFIG_PGTABLE_LEVELS > 4 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) { paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); paravirt_tlb_remove_table(tlb, virt_to_page(p4d)); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #endif /* CONFIG_PGTABLE_LEVELS > 2 */ static inline void pgd_list_add(pgd_t *pgd) { struct ptdesc *ptdesc = virt_to_ptdesc(pgd); list_add(&ptdesc->pt_list, &pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct ptdesc *ptdesc = virt_to_ptdesc(pgd); list_del(&ptdesc->pt_list); } #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) #define MAX_UNSHARED_PTRS_PER_PGD \ max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { virt_to_ptdesc(pgd)->pt_mm = mm; } struct mm_struct *pgd_page_get_mm(struct page *page) { return page_ptdesc(page)->pt_mm; } static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) { /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the references from swapper_pg_dir. */ if (CONFIG_PGTABLE_LEVELS == 2 || (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || CONFIG_PGTABLE_LEVELS >= 4) { clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); } /* list required to sync kernel mapping updates */ if (!SHARED_KERNEL_PMD) { pgd_set_mm(pgd, mm); pgd_list_add(pgd); } } static void pgd_dtor(pgd_t *pgd) { if (SHARED_KERNEL_PMD) return; spin_lock(&pgd_lock); pgd_list_del(pgd); spin_unlock(&pgd_lock); } /* * List of all pgd's needed for non-PAE so it can invalidate entries * in both cached and uncached pgd's; not needed for PAE since the * kernel pmd is shared. If PAE were not to share the pmd a similar * tactic would be needed. This is essentially codepath-based locking * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. * -- nyc */ #ifdef CONFIG_X86_PAE /* * In PAE mode, we need to do a cr3 reload (=tlb flush) when * updating the top-level pagetable entries to guarantee the * processor notices the update. Since this is expensive, and * all 4 top-level entries are used almost immediately in a * new process's life, we just pre-populate them here. * * Also, if we're in a paravirt environment where the kernel pmd is * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD #define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table * when PTI is enabled. We need them to map the per-process LDT into the * user-space page-table. */ #define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) #define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); /* Note: almost everything apart from _PAGE_PRESENT is reserved at the pmd (PDPT) level. */ set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); /* * According to Intel App note "TLBs, Paging-Structure Caches, * and Their Invalidation", April 2007, document 317080-001, * section 8.1: in PAE mode we explicitly have to flush the * TLB via cr3 if the top-level pgd is changed... */ flush_tlb_mm(mm); } #else /* !CONFIG_X86_PAE */ /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 #define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 #define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; struct ptdesc *ptdesc; for (i = 0; i < count; i++) if (pmds[i]) { ptdesc = virt_to_ptdesc(pmds[i]); pagetable_pmd_dtor(ptdesc); pagetable_free(ptdesc); mm_dec_nr_pmds(mm); } } static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; bool failed = false; gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; gfp &= ~__GFP_HIGHMEM; for (i = 0; i < count; i++) { pmd_t *pmd = NULL; struct ptdesc *ptdesc = pagetable_alloc(gfp, 0); if (!ptdesc) failed = true; if (ptdesc && !pagetable_pmd_ctor(ptdesc)) { pagetable_free(ptdesc); ptdesc = NULL; failed = true; } if (ptdesc) { mm_inc_nr_pmds(mm); pmd = ptdesc_address(ptdesc); } pmds[i] = pmd; } if (failed) { free_pmds(mm, pmds, count); return -ENOMEM; } return 0; } /* * Mop up any pmd pages which may still be attached to the pgd. * Normally they will be freed by munmap/exit_mmap, but any pmd we * preallocate which never got a corresponding vma will need to be * freed manually. */ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) { pgd_t pgd = *pgdp; if (pgd_val(pgd) != 0) { pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); pgd_clear(pgdp); paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); mm_dec_nr_pmds(mm); } } static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) { int i; for (i = 0; i < PREALLOCATED_PMDS; i++) mop_up_one_pmd(mm, &pgdp[i]); #ifdef CONFIG_PAGE_TABLE_ISOLATION if (!boot_cpu_has(X86_FEATURE_PTI)) return; pgdp = kernel_to_user_pgdp(pgdp); for (i = 0; i < PREALLOCATED_USER_PMDS; i++) mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]); #endif } static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) { p4d_t *p4d; pud_t *pud; int i; p4d = p4d_offset(pgd, 0); pud = pud_offset(p4d, 0); for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) { pmd_t *pmd = pmds[i]; if (i >= KERNEL_PGD_BOUNDARY) memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), sizeof(pmd_t) * PTRS_PER_PMD); pud_populate(mm, pud, pmd); } } #ifdef CONFIG_PAGE_TABLE_ISOLATION static void pgd_prepopulate_user_pmd(struct mm_struct *mm, pgd_t *k_pgd, pmd_t *pmds[]) { pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir); pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); p4d_t *u_p4d; pud_t *u_pud; int i; u_p4d = p4d_offset(u_pgd, 0); u_pud = pud_offset(u_p4d, 0); s_pgd += KERNEL_PGD_BOUNDARY; u_pud += KERNEL_PGD_BOUNDARY; for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) { pmd_t *pmd = pmds[i]; memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd), sizeof(pmd_t) * PTRS_PER_PMD); pud_populate(mm, u_pud, pmd); } } #else static void pgd_prepopulate_user_pmd(struct mm_struct *mm, pgd_t *k_pgd, pmd_t *pmds[]) { } #endif /* * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also * assumes that pgd should be in one page. * * But kernel with PAE paging that is not running as a Xen domain * only needs to allocate 32 bytes for pgd instead of one page. */ #ifdef CONFIG_X86_PAE #include <linux/slab.h> #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #define PGD_ALIGN 32 static struct kmem_cache *pgd_cache; void __init pgtable_cache_init(void) { /* * When PAE kernel is running as a Xen domain, it does not use * shared kernel pmd. And this requires a whole page for pgd. */ if (!SHARED_KERNEL_PMD) return; /* * when PAE kernel is not running as a Xen domain, it uses * shared kernel pmd. Shared kernel pmd does not require a whole * page for pgd. We are able to just allocate a 32-byte for pgd. * During boot time, we create a 32-byte slab for pgd table allocation. */ pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, SLAB_PANIC, NULL); } static inline pgd_t *_pgd_alloc(void) { /* * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, PGD_ALLOCATION_ORDER); /* * Now PAE kernel is not running as a Xen domain. We can allocate * a 32-byte slab for pgd to save memory space. */ return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); } static inline void _pgd_free(pgd_t *pgd) { if (!SHARED_KERNEL_PMD) free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); else kmem_cache_free(pgd_cache, pgd); } #else static inline pgd_t *_pgd_alloc(void) { return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) { free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); } #endif /* CONFIG_X86_PAE */ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(); if (pgd == NULL) goto out; mm->pgd = pgd; if (sizeof(pmds) != 0 && preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) goto out_free_pgd; if (sizeof(u_pmds) != 0 && preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) goto out_free_pmds; if (paravirt_pgd_alloc(mm) != 0) goto out_free_user_pmds; /* * Make sure that pre-populating the pmds is atomic with * respect to anything walking the pgd_list, so that they * never see a partially populated pgd. */ spin_lock(&pgd_lock); pgd_ctor(mm, pgd); if (sizeof(pmds) != 0) pgd_prepopulate_pmd(mm, pgd, pmds); if (sizeof(u_pmds) != 0) pgd_prepopulate_user_pmd(mm, pgd, u_pmds); spin_unlock(&pgd_lock); return pgd; out_free_user_pmds: if (sizeof(u_pmds) != 0) free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); out_free_pmds: if (sizeof(pmds) != 0) free_pmds(mm, pmds, PREALLOCATED_PMDS); out_free_pgd: _pgd_free(pgd); out: return NULL; } void pgd_free(struct mm_struct *mm, pgd_t *pgd) { pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); _pgd_free(pgd); } /* * Used to set accessed or dirty bits in the page table entries * on other architectures. On x86, the accessed and dirty bits * are tracked by hardware. However, do_wp_page calls this function * to also make the pte writeable at the same time the dirty bit is * set. In that case we do actually need to write the PTE. */ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { int changed = !pte_same(*ptep, entry); if (changed && dirty) set_pte(ptep, entry); return changed; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { int changed = !pmd_same(*pmdp, entry); VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed && dirty) { set_pmd(pmdp, entry); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault. */ } return changed; } int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, pud_t entry, int dirty) { int changed = !pud_same(*pudp, entry); VM_BUG_ON(address & ~HPAGE_PUD_MASK); if (changed && dirty) { set_pud(pudp, entry); /* * We had a write-protection fault here and changed the pud * to to more permissive. No need to flush the TLB for that, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault. */ } return changed; } #endif int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { int ret = 0; if (pte_young(*ptep)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *) &ptep->pte); return ret; } #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { int ret = 0; if (pmd_young(*pmdp)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pmdp); return ret; } #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { int ret = 0; if (pud_young(*pudp)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pudp); return ret; } #endif int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { /* * On x86 CPUs, clearing the accessed bit without a TLB flush * doesn't cause data corruption. [ It could cause incorrect * page aging and the (mistaken) reclaim of hot pages, but the * chance of that should be relatively low. ] * * So as a performance optimization don't flush the TLB when * clearing the accessed bit, it will eventually be flushed by * a context switch or a VM operation anyway. [ In the rare * event of it not getting flushed for a long time the delay * shouldn't really matter because there's no real memory * pressure for swapout to react to. ] */ return ptep_test_and_clear_young(vma, address, ptep); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { int young; VM_BUG_ON(address & ~HPAGE_PMD_MASK); young = pmdp_test_and_clear_young(vma, address, pmdp); if (young) flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return young; } pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { /* * No flush is necessary. Once an invalid PTE is established, the PTE's * access and dirty bits cannot be updated. */ return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); } #endif /** * reserve_top_address - reserves a hole in the top of kernel address space * @reserve - size of hole to reserve * * Can be used to relocate the fixmap area and poke a hole in the top * of kernel address space to make room for a hypervisor. */ void __init reserve_top_address(unsigned long reserve) { #ifdef CONFIG_X86_32 BUG_ON(fixmaps_set > 0); __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE; printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n", -reserve, __FIXADDR_TOP + PAGE_SIZE); #endif } int fixmaps_set; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); #ifdef CONFIG_X86_64 /* * Ensure that the static initial page tables are covering the * fixmap completely. */ BUILD_BUG_ON(__end_of_permanent_fixed_addresses > (FIXMAP_PMD_NUM * PTRS_PER_PTE)); #endif if (idx >= __end_of_fixed_addresses) { BUG(); return; } set_pte_vaddr(address, pte); fixmaps_set++; } void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) { /* Sanitize 'prot' against any unsupported bits: */ pgprot_val(flags) &= __default_kernel_pte_mask; __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #ifdef CONFIG_X86_5LEVEL /** * p4d_set_huge - setup kernel P4D mapping * * No 512GB pages yet -- always return 0 */ int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) { return 0; } /** * p4d_clear_huge - clear kernel P4D mapping when it is set * * No 512GB pages yet -- always return 0 */ void p4d_clear_huge(p4d_t *p4d) { } #endif /** * pud_set_huge - setup kernel PUD mapping * * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this * function sets up a huge page only if the complete range has the same MTRR * caching mode. * * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger * page mapping attempt fails. * * Returns 1 on success and 0 on failure. */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { u8 uniform; mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); if (!uniform) return 0; /* Bail out if we are we on a populated non-leaf entry: */ if (pud_present(*pud) && !pud_huge(*pud)) return 0; set_pte((pte_t *)pud, pfn_pte( (u64)addr >> PAGE_SHIFT, __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); return 1; } /** * pmd_set_huge - setup kernel PMD mapping * * See text over pud_set_huge() above. * * Returns 1 on success and 0 on failure. */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { u8 uniform; mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); if (!uniform) { pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", __func__, addr, addr + PMD_SIZE); return 0; } /* Bail out if we are we on a populated non-leaf entry: */ if (pmd_present(*pmd) && !pmd_huge(*pmd)) return 0; set_pte((pte_t *)pmd, pfn_pte( (u64)addr >> PAGE_SHIFT, __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); return 1; } /** * pud_clear_huge - clear kernel PUD mapping when it is set * * Returns 1 on success and 0 on failure (no PUD map is found). */ int pud_clear_huge(pud_t *pud) { if (pud_large(*pud)) { pud_clear(pud); return 1; } return 0; } /** * pmd_clear_huge - clear kernel PMD mapping when it is set * * Returns 1 on success and 0 on failure (no PMD map is found). */ int pmd_clear_huge(pmd_t *pmd) { if (pmd_large(*pmd)) { pmd_clear(pmd); return 1; } return 0; } #ifdef CONFIG_X86_64 /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. * @addr: Virtual address associated with pud. * * Context: The pud range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. * * NOTE: Callers must allow a single page allocation. */ int pud_free_pmd_page(pud_t *pud, unsigned long addr) { pmd_t *pmd, *pmd_sv; pte_t *pte; int i; pmd = pud_pgtable(*pud); pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); if (!pmd_sv) return 0; for (i = 0; i < PTRS_PER_PMD; i++) { pmd_sv[i] = pmd[i]; if (!pmd_none(pmd[i])) pmd_clear(&pmd[i]); } pud_clear(pud); /* INVLPG to clear all paging-structure caches */ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); for (i = 0; i < PTRS_PER_PMD; i++) { if (!pmd_none(pmd_sv[i])) { pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); free_page((unsigned long)pte); } } free_page((unsigned long)pmd_sv); pagetable_pmd_dtor(virt_to_ptdesc(pmd)); free_page((unsigned long)pmd); return 1; } /** * pmd_free_pte_page - Clear pmd entry and free pte page. * @pmd: Pointer to a PMD. * @addr: Virtual address associated with pmd. * * Context: The pmd range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { pte_t *pte; pte = (pte_t *)pmd_page_vaddr(*pmd); pmd_clear(pmd); /* INVLPG to clear all paging-structure caches */ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); free_page((unsigned long)pte); return 1; } #else /* !CONFIG_X86_64 */ /* * Disable free page handling on x86-PAE. This assures that ioremap() * does not update sync'd pmd entries. See vmalloc_sync_one(). */ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } #endif /* CONFIG_X86_64 */ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma) { if (vma->vm_flags & VM_SHADOW_STACK) return pte_mkwrite_shstk(pte); pte = pte_mkwrite_novma(pte); return pte_clear_saveddirty(pte); } pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { if (vma->vm_flags & VM_SHADOW_STACK) return pmd_mkwrite_shstk(pmd); pmd = pmd_mkwrite_novma(pmd); return pmd_clear_saveddirty(pmd); } void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte) { /* * Hardware before shadow stack can (rarely) set Dirty=1 * on a Write=0 PTE. So the below condition * only indicates a software bug when shadow stack is * supported by the HW. This checking is covered in * pte_shstk(). */ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pte_shstk(pte)); } void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd) { /* See note in arch_check_zapped_pte() */ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pmd_shstk(pmd)); } |
58 77 77 77 58 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | /* * linux/drivers/video/fbcmap.c -- Colormap handling for frame buffer devices * * Created 15 Jun 1997 by Geert Uytterhoeven * * 2001 - Documented with DocBook * - Brad Douglas <brad@neruo.com> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. */ #include <linux/string.h> #include <linux/module.h> #include <linux/fb.h> #include <linux/slab.h> #include <linux/uaccess.h> static u16 red2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 green2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 blue2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 red4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 green4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 blue4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 red8[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; static u16 green8[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa }; static u16 blue8[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa }; static u16 red16[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0x5555, 0x5555, 0x5555, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff }; static u16 green16[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa, 0x5555, 0x5555, 0xffff, 0xffff, 0x5555, 0x5555, 0xffff, 0xffff }; static u16 blue16[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff }; static const struct fb_cmap default_2_colors = { .len=2, .red=red2, .green=green2, .blue=blue2 }; static const struct fb_cmap default_8_colors = { .len=8, .red=red8, .green=green8, .blue=blue8 }; static const struct fb_cmap default_4_colors = { .len=4, .red=red4, .green=green4, .blue=blue4 }; static const struct fb_cmap default_16_colors = { .len=16, .red=red16, .green=green16, .blue=blue16 }; /** * fb_alloc_cmap_gfp - allocate a colormap * @cmap: frame buffer colormap structure * @len: length of @cmap * @transp: boolean, 1 if there is transparency, 0 otherwise * @flags: flags for kmalloc memory allocation * * Allocates memory for a colormap @cmap. @len is the * number of entries in the palette. * * Returns negative errno on error, or zero on success. * */ int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags) { int size = len * sizeof(u16); int ret = -ENOMEM; flags |= __GFP_NOWARN; if (cmap->len != len) { fb_dealloc_cmap(cmap); if (!len) return 0; cmap->red = kzalloc(size, flags); if (!cmap->red) goto fail; cmap->green = kzalloc(size, flags); if (!cmap->green) goto fail; cmap->blue = kzalloc(size, flags); if (!cmap->blue) goto fail; if (transp) { cmap->transp = kzalloc(size, flags); if (!cmap->transp) goto fail; } else { cmap->transp = NULL; } } cmap->start = 0; cmap->len = len; ret = fb_copy_cmap(fb_default_cmap(len), cmap); if (ret) goto fail; return 0; fail: fb_dealloc_cmap(cmap); return ret; } int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp) { return fb_alloc_cmap_gfp(cmap, len, transp, GFP_ATOMIC); } /** * fb_dealloc_cmap - deallocate a colormap * @cmap: frame buffer colormap structure * * Deallocates a colormap that was previously allocated with * fb_alloc_cmap(). * */ void fb_dealloc_cmap(struct fb_cmap *cmap) { kfree(cmap->red); kfree(cmap->green); kfree(cmap->blue); kfree(cmap->transp); cmap->red = cmap->green = cmap->blue = cmap->transp = NULL; cmap->len = 0; } /** * fb_copy_cmap - copy a colormap * @from: frame buffer colormap structure * @to: frame buffer colormap structure * * Copy contents of colormap from @from to @to. */ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); memcpy(to->red+tooff, from->red+fromoff, size); memcpy(to->green+tooff, from->green+fromoff, size); memcpy(to->blue+tooff, from->blue+fromoff, size); if (from->transp && to->transp) memcpy(to->transp+tooff, from->transp+fromoff, size); return 0; } int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); if (copy_to_user(to->red+tooff, from->red+fromoff, size)) return -EFAULT; if (copy_to_user(to->green+tooff, from->green+fromoff, size)) return -EFAULT; if (copy_to_user(to->blue+tooff, from->blue+fromoff, size)) return -EFAULT; if (from->transp && to->transp) if (copy_to_user(to->transp+tooff, from->transp+fromoff, size)) return -EFAULT; return 0; } /** * fb_set_cmap - set the colormap * @cmap: frame buffer colormap structure * @info: frame buffer info structure * * Sets the colormap @cmap for a screen of device @info. * * Returns negative errno on error, or zero on success. * */ int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *info) { int i, start, rc = 0; u16 *red, *green, *blue, *transp; u_int hred, hgreen, hblue, htransp = 0xffff; red = cmap->red; green = cmap->green; blue = cmap->blue; transp = cmap->transp; start = cmap->start; if (start < 0 || (!info->fbops->fb_setcolreg && !info->fbops->fb_setcmap)) return -EINVAL; if (info->fbops->fb_setcmap) { rc = info->fbops->fb_setcmap(cmap, info); } else { for (i = 0; i < cmap->len; i++) { hred = *red++; hgreen = *green++; hblue = *blue++; if (transp) htransp = *transp++; if (info->fbops->fb_setcolreg(start++, hred, hgreen, hblue, htransp, info)) break; } } if (rc == 0) fb_copy_cmap(cmap, &info->cmap); return rc; } int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) { int rc, size = cmap->len * sizeof(u16); struct fb_cmap umap; if (size < 0 || size < cmap->len) return -E2BIG; memset(&umap, 0, sizeof(struct fb_cmap)); rc = fb_alloc_cmap_gfp(&umap, cmap->len, cmap->transp != NULL, GFP_KERNEL); if (rc) return rc; if (copy_from_user(umap.red, cmap->red, size) || copy_from_user(umap.green, cmap->green, size) || copy_from_user(umap.blue, cmap->blue, size) || (cmap->transp && copy_from_user(umap.transp, cmap->transp, size))) { rc = -EFAULT; goto out; } umap.start = cmap->start; lock_fb_info(info); rc = fb_set_cmap(&umap, info); unlock_fb_info(info); out: fb_dealloc_cmap(&umap); return rc; } /** * fb_default_cmap - get default colormap * @len: size of palette for a depth * * Gets the default colormap for a specific screen depth. @len * is the size of the palette for a particular screen depth. * * Returns pointer to a frame buffer colormap structure. * */ const struct fb_cmap *fb_default_cmap(int len) { if (len <= 2) return &default_2_colors; if (len <= 4) return &default_4_colors; if (len <= 8) return &default_8_colors; return &default_16_colors; } /** * fb_invert_cmaps - invert all defaults colormaps * * Invert all default colormaps. * */ void fb_invert_cmaps(void) { u_int i; for (i = 0; i < ARRAY_SIZE(red2); i++) { red2[i] = ~red2[i]; green2[i] = ~green2[i]; blue2[i] = ~blue2[i]; } for (i = 0; i < ARRAY_SIZE(red4); i++) { red4[i] = ~red4[i]; green4[i] = ~green4[i]; blue4[i] = ~blue4[i]; } for (i = 0; i < ARRAY_SIZE(red8); i++) { red8[i] = ~red8[i]; green8[i] = ~green8[i]; blue8[i] = ~blue8[i]; } for (i = 0; i < ARRAY_SIZE(red16); i++) { red16[i] = ~red16[i]; green16[i] = ~green16[i]; blue16[i] = ~blue16[i]; } } /* * Visible symbols for modules */ EXPORT_SYMBOL(fb_alloc_cmap); EXPORT_SYMBOL(fb_dealloc_cmap); EXPORT_SYMBOL(fb_copy_cmap); EXPORT_SYMBOL(fb_set_cmap); EXPORT_SYMBOL(fb_default_cmap); EXPORT_SYMBOL(fb_invert_cmaps); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2012 Red Hat. All rights reserved. */ #ifndef BTRFS_RCU_STRING_H #define BTRFS_RCU_STRING_H struct rcu_string { struct rcu_head rcu; char str[]; }; static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) { size_t len = strlen(src) + 1; struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) + (len * sizeof(char)), mask); if (!ret) return ret; /* Warn if the source got unexpectedly truncated. */ if (WARN_ON(strscpy(ret->str, src, len) < 0)) { kfree(ret); return NULL; } return ret; } static inline void rcu_string_free(struct rcu_string *str) { if (str) kfree_rcu(str, rcu); } #define printk_in_rcu(fmt, ...) do { \ rcu_read_lock(); \ printk(fmt, __VA_ARGS__); \ rcu_read_unlock(); \ } while (0) #define printk_ratelimited_in_rcu(fmt, ...) do { \ rcu_read_lock(); \ printk_ratelimited(fmt, __VA_ARGS__); \ rcu_read_unlock(); \ } while (0) #define rcu_str_deref(rcu_str) ({ \ struct rcu_string *__str = rcu_dereference(rcu_str); \ __str->str; \ }) #endif |
3 1 3 1 6 1 2 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | // SPDX-License-Identifier: GPL-2.0-or-later /* ----------------------------------------------------------------------- * * * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved * Copyright 2009 Intel Corporation; author: H. Peter Anvin * * ----------------------------------------------------------------------- */ /* * x86 MSR access device * * This device is accessed by lseek() to the appropriate register number * and then read/write in chunks of 8 bytes. A larger size means multiple * reads or writes of the same register. * * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on * an SMP box will direct the access to CPU %d. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/smp.h> #include <linux/major.h> #include <linux/fs.h> #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/uaccess.h> #include <linux/gfp.h> #include <linux/security.h> #include <asm/cpufeature.h> #include <asm/msr.h> static enum cpuhp_state cpuhp_msr_state; enum allow_write_msrs { MSR_WRITES_ON, MSR_WRITES_OFF, MSR_WRITES_DEFAULT, }; static enum allow_write_msrs allow_writes = MSR_WRITES_DEFAULT; static ssize_t msr_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { u32 __user *tmp = (u32 __user *) buf; u32 data[2]; u32 reg = *ppos; int cpu = iminor(file_inode(file)); int err = 0; ssize_t bytes = 0; if (count % 8) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 8) { err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); if (err) break; if (copy_to_user(tmp, &data, 8)) { err = -EFAULT; break; } tmp += 2; bytes += 8; } return bytes ? bytes : err; } static int filter_write(u32 reg) { /* * MSRs writes usually happen all at once, and can easily saturate kmsg. * Only allow one message every 30 seconds. * * It's possible to be smarter here and do it (for example) per-MSR, but * it would certainly be more complex, and this is enough at least to * avoid saturating the ring buffer. */ static DEFINE_RATELIMIT_STATE(fw_rs, 30 * HZ, 1); switch (allow_writes) { case MSR_WRITES_ON: return 0; case MSR_WRITES_OFF: return -EPERM; default: break; } if (!__ratelimit(&fw_rs)) return 0; pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d).\n", reg, current->comm, current->pid); pr_warn("See https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about for details.\n"); return 0; } static ssize_t msr_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { const u32 __user *tmp = (const u32 __user *)buf; u32 data[2]; u32 reg = *ppos; int cpu = iminor(file_inode(file)); int err = 0; ssize_t bytes = 0; err = security_locked_down(LOCKDOWN_MSR); if (err) return err; err = filter_write(reg); if (err) return err; if (count % 8) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 8) { if (copy_from_user(&data, tmp, 8)) { err = -EFAULT; break; } add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); if (err) break; tmp += 2; bytes += 8; } return bytes ? bytes : err; } static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) { u32 __user *uregs = (u32 __user *)arg; u32 regs[8]; int cpu = iminor(file_inode(file)); int err; switch (ioc) { case X86_IOC_RDMSR_REGS: if (!(file->f_mode & FMODE_READ)) { err = -EBADF; break; } if (copy_from_user(®s, uregs, sizeof(regs))) { err = -EFAULT; break; } err = rdmsr_safe_regs_on_cpu(cpu, regs); if (err) break; if (copy_to_user(uregs, ®s, sizeof(regs))) err = -EFAULT; break; case X86_IOC_WRMSR_REGS: if (!(file->f_mode & FMODE_WRITE)) { err = -EBADF; break; } if (copy_from_user(®s, uregs, sizeof(regs))) { err = -EFAULT; break; } err = security_locked_down(LOCKDOWN_MSR); if (err) break; err = filter_write(regs[1]); if (err) return err; add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); err = wrmsr_safe_regs_on_cpu(cpu, regs); if (err) break; if (copy_to_user(uregs, ®s, sizeof(regs))) err = -EFAULT; break; default: err = -ENOTTY; break; } return err; } static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file_inode(file)); struct cpuinfo_x86 *c; if (!capable(CAP_SYS_RAWIO)) return -EPERM; if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) return -EIO; /* MSR not supported */ return 0; } /* * File operations we support */ static const struct file_operations msr_fops = { .owner = THIS_MODULE, .llseek = no_seek_end_llseek, .read = msr_read, .write = msr_write, .open = msr_open, .unlocked_ioctl = msr_ioctl, .compat_ioctl = msr_ioctl, }; static char *msr_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); } static const struct class msr_class = { .name = "msr", .devnode = msr_devnode, }; static int msr_device_create(unsigned int cpu) { struct device *dev; dev = device_create(&msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL, "msr%d", cpu); return PTR_ERR_OR_ZERO(dev); } static int msr_device_destroy(unsigned int cpu) { device_destroy(&msr_class, MKDEV(MSR_MAJOR, cpu)); return 0; } static int __init msr_init(void) { int err; if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { pr_err("unable to get major %d for msr\n", MSR_MAJOR); return -EBUSY; } err = class_register(&msr_class); if (err) goto out_chrdev; err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/msr:online", msr_device_create, msr_device_destroy); if (err < 0) goto out_class; cpuhp_msr_state = err; return 0; out_class: class_unregister(&msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); return err; } module_init(msr_init); static void __exit msr_exit(void) { cpuhp_remove_state(cpuhp_msr_state); class_unregister(&msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); } module_exit(msr_exit) static int set_allow_writes(const char *val, const struct kernel_param *cp) { /* val is NUL-terminated, see kernfs_fop_write() */ char *s = strstrip((char *)val); if (!strcmp(s, "on")) allow_writes = MSR_WRITES_ON; else if (!strcmp(s, "off")) allow_writes = MSR_WRITES_OFF; else allow_writes = MSR_WRITES_DEFAULT; return 0; } static int get_allow_writes(char *buf, const struct kernel_param *kp) { const char *res; switch (allow_writes) { case MSR_WRITES_ON: res = "on"; break; case MSR_WRITES_OFF: res = "off"; break; default: res = "default"; break; } return sprintf(buf, "%s\n", res); } static const struct kernel_param_ops allow_writes_ops = { .set = set_allow_writes, .get = get_allow_writes }; module_param_cb(allow_writes, &allow_writes_ops, NULL, 0600); MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>"); MODULE_DESCRIPTION("x86 generic MSR driver"); MODULE_LICENSE("GPL"); |
4 1 4 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | /* * linux/fs/nls/nls_cp737.c * * Charset cp737 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, /* 0x90*/ 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, /* 0xa0*/ 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, 0x03c1, 0x03c3, 0x03c2, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, /* 0xd0*/ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, /* 0xe0*/ 0x03c9, 0x03ac, 0x03ad, 0x03ae, 0x03ca, 0x03af, 0x03cc, 0x03cd, 0x03cb, 0x03ce, 0x0386, 0x0388, 0x0389, 0x038a, 0x038c, 0x038e, /* 0xf0*/ 0x038f, 0x00b1, 0x2265, 0x2264, 0x03aa, 0x03ab, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0x00, 0x00, 0xfa, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, /* 0xf0-0xf7 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xea, 0x00, /* 0x80-0x87 */ 0xeb, 0xec, 0xed, 0x00, 0xee, 0x00, 0xef, 0xf0, /* 0x88-0x8f */ 0x00, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, /* 0x90-0x97 */ 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, /* 0x98-0x9f */ 0x8f, 0x90, 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, /* 0xa0-0xa7 */ 0x96, 0x97, 0xf4, 0xf5, 0xe1, 0xe2, 0xe3, 0xe5, /* 0xa8-0xaf */ 0x00, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, /* 0xb0-0xb7 */ 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, /* 0xb8-0xbf */ 0xa7, 0xa8, 0xaa, 0xa9, 0xab, 0xac, 0xad, 0xae, /* 0xc0-0xc7 */ 0xaf, 0xe0, 0xe4, 0xe8, 0xe6, 0xe7, 0xe9, 0x00, /* 0xc8-0xcf */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, /* 0x78-0x7f */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0xf9, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0xf3, 0xf2, 0x00, 0x00, /* 0x60-0x67 */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0xd5, 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, /* 0x50-0x57 */ 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, /* 0x58-0x5f */ 0xcc, 0xb5, 0xb6, 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, /* 0x60-0x67 */ 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xde, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, page22, NULL, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x80-0x87 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0x88-0x8f */ 0xa8, 0xa9, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xe0, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xe1, 0xe2, 0xe3, 0xe5, 0xe6, 0xe7, /* 0xe8-0xef */ 0xe9, 0xf1, 0xf2, 0xf3, 0xe4, 0xe8, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x98-0x9f */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0xa0-0xa7 */ 0x90, 0x91, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0x97, 0xea, 0xeb, 0xec, 0xf4, 0xed, 0xee, 0xef, /* 0xe0-0xe7 */ 0xf5, 0xf0, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp737", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp737(void) { return register_nls(&table); } static void __exit exit_nls_cp737(void) { unregister_nls(&table); } module_init(init_nls_cp737) module_exit(exit_nls_cp737) MODULE_LICENSE("Dual BSD/GPL"); |
793 819 1 42 42 42 8 8 1 195 194 194 13 13 2 1 9 10 8 8 2 6 6 26 25 26 26 757 755 754 2 2 1 1 1 42 42 42 42 42 8 8 3 5 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Video capture interface for Linux version 2 * * A generic video device interface for the LINUX operating system * using a set of device structures/vectors for low level operations. * * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1) * Mauro Carvalho Chehab <mchehab@kernel.org> (version 2) * * Fixes: 20000516 Claudio Matsuoka <claudio@conectiva.com> * - Added procfs support */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/debugfs.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <media/v4l2-common.h> #include <media/v4l2-device.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-event.h> #define VIDEO_NUM_DEVICES 256 #define VIDEO_NAME "video4linux" #define dprintk(fmt, arg...) do { \ printk(KERN_DEBUG pr_fmt("%s: " fmt), \ __func__, ##arg); \ } while (0) /* * sysfs stuff */ static ssize_t index_show(struct device *cd, struct device_attribute *attr, char *buf) { struct video_device *vdev = to_video_device(cd); return sprintf(buf, "%i\n", vdev->index); } static DEVICE_ATTR_RO(index); static ssize_t dev_debug_show(struct device *cd, struct device_attribute *attr, char *buf) { struct video_device *vdev = to_video_device(cd); return sprintf(buf, "%i\n", vdev->dev_debug); } static ssize_t dev_debug_store(struct device *cd, struct device_attribute *attr, const char *buf, size_t len) { struct video_device *vdev = to_video_device(cd); int res = 0; u16 value; res = kstrtou16(buf, 0, &value); if (res) return res; vdev->dev_debug = value; return len; } static DEVICE_ATTR_RW(dev_debug); static ssize_t name_show(struct device *cd, struct device_attribute *attr, char *buf) { struct video_device *vdev = to_video_device(cd); return sprintf(buf, "%.*s\n", (int)sizeof(vdev->name), vdev->name); } static DEVICE_ATTR_RO(name); static struct attribute *video_device_attrs[] = { &dev_attr_name.attr, &dev_attr_dev_debug.attr, &dev_attr_index.attr, NULL, }; ATTRIBUTE_GROUPS(video_device); /* * Active devices */ static struct video_device *video_devices[VIDEO_NUM_DEVICES]; static DEFINE_MUTEX(videodev_lock); static DECLARE_BITMAP(devnode_nums[VFL_TYPE_MAX], VIDEO_NUM_DEVICES); /* Device node utility functions */ /* Note: these utility functions all assume that vfl_type is in the range [0, VFL_TYPE_MAX-1]. */ #ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES /* Return the bitmap corresponding to vfl_type. */ static inline unsigned long *devnode_bits(enum vfl_devnode_type vfl_type) { /* Any types not assigned to fixed minor ranges must be mapped to one single bitmap for the purposes of finding a free node number since all those unassigned types use the same minor range. */ int idx = (vfl_type > VFL_TYPE_RADIO) ? VFL_TYPE_MAX - 1 : vfl_type; return devnode_nums[idx]; } #else /* Return the bitmap corresponding to vfl_type. */ static inline unsigned long *devnode_bits(enum vfl_devnode_type vfl_type) { return devnode_nums[vfl_type]; } #endif /* Mark device node number vdev->num as used */ static inline void devnode_set(struct video_device *vdev) { set_bit(vdev->num, devnode_bits(vdev->vfl_type)); } /* Mark device node number vdev->num as unused */ static inline void devnode_clear(struct video_device *vdev) { clear_bit(vdev->num, devnode_bits(vdev->vfl_type)); } /* Try to find a free device node number in the range [from, to> */ static inline int devnode_find(struct video_device *vdev, int from, int to) { return find_next_zero_bit(devnode_bits(vdev->vfl_type), to, from); } struct video_device *video_device_alloc(void) { return kzalloc(sizeof(struct video_device), GFP_KERNEL); } EXPORT_SYMBOL(video_device_alloc); void video_device_release(struct video_device *vdev) { kfree(vdev); } EXPORT_SYMBOL(video_device_release); void video_device_release_empty(struct video_device *vdev) { /* Do nothing */ /* Only valid when the video_device struct is a static. */ } EXPORT_SYMBOL(video_device_release_empty); static inline void video_get(struct video_device *vdev) { get_device(&vdev->dev); } static inline void video_put(struct video_device *vdev) { put_device(&vdev->dev); } /* Called when the last user of the video device exits. */ static void v4l2_device_release(struct device *cd) { struct video_device *vdev = to_video_device(cd); struct v4l2_device *v4l2_dev = vdev->v4l2_dev; mutex_lock(&videodev_lock); if (WARN_ON(video_devices[vdev->minor] != vdev)) { /* should not happen */ mutex_unlock(&videodev_lock); return; } /* Free up this device for reuse */ video_devices[vdev->minor] = NULL; /* Delete the cdev on this minor as well */ cdev_del(vdev->cdev); /* Just in case some driver tries to access this from the release() callback. */ vdev->cdev = NULL; /* Mark device node number as free */ devnode_clear(vdev); mutex_unlock(&videodev_lock); #if defined(CONFIG_MEDIA_CONTROLLER) if (v4l2_dev->mdev && vdev->vfl_dir != VFL_DIR_M2M) { /* Remove interfaces and interface links */ media_devnode_remove(vdev->intf_devnode); if (vdev->entity.function != MEDIA_ENT_F_UNKNOWN) media_device_unregister_entity(&vdev->entity); } #endif /* Do not call v4l2_device_put if there is no release callback set. * Drivers that have no v4l2_device release callback might free the * v4l2_dev instance in the video_device release callback below, so we * must perform this check here. * * TODO: In the long run all drivers that use v4l2_device should use the * v4l2_device release callback. This check will then be unnecessary. */ if (v4l2_dev->release == NULL) v4l2_dev = NULL; /* Release video_device and perform other cleanups as needed. */ vdev->release(vdev); /* Decrease v4l2_device refcount */ if (v4l2_dev) v4l2_device_put(v4l2_dev); } static struct class video_class = { .name = VIDEO_NAME, .dev_groups = video_device_groups, }; struct video_device *video_devdata(struct file *file) { return video_devices[iminor(file_inode(file))]; } EXPORT_SYMBOL(video_devdata); /* Priority handling */ static inline bool prio_is_valid(enum v4l2_priority prio) { return prio == V4L2_PRIORITY_BACKGROUND || prio == V4L2_PRIORITY_INTERACTIVE || prio == V4L2_PRIORITY_RECORD; } void v4l2_prio_init(struct v4l2_prio_state *global) { memset(global, 0, sizeof(*global)); } EXPORT_SYMBOL(v4l2_prio_init); int v4l2_prio_change(struct v4l2_prio_state *global, enum v4l2_priority *local, enum v4l2_priority new) { if (!prio_is_valid(new)) return -EINVAL; if (*local == new) return 0; atomic_inc(&global->prios[new]); if (prio_is_valid(*local)) atomic_dec(&global->prios[*local]); *local = new; return 0; } EXPORT_SYMBOL(v4l2_prio_change); void v4l2_prio_open(struct v4l2_prio_state *global, enum v4l2_priority *local) { v4l2_prio_change(global, local, V4L2_PRIORITY_DEFAULT); } EXPORT_SYMBOL(v4l2_prio_open); void v4l2_prio_close(struct v4l2_prio_state *global, enum v4l2_priority local) { if (prio_is_valid(local)) atomic_dec(&global->prios[local]); } EXPORT_SYMBOL(v4l2_prio_close); enum v4l2_priority v4l2_prio_max(struct v4l2_prio_state *global) { if (atomic_read(&global->prios[V4L2_PRIORITY_RECORD]) > 0) return V4L2_PRIORITY_RECORD; if (atomic_read(&global->prios[V4L2_PRIORITY_INTERACTIVE]) > 0) return V4L2_PRIORITY_INTERACTIVE; if (atomic_read(&global->prios[V4L2_PRIORITY_BACKGROUND]) > 0) return V4L2_PRIORITY_BACKGROUND; return V4L2_PRIORITY_UNSET; } EXPORT_SYMBOL(v4l2_prio_max); int v4l2_prio_check(struct v4l2_prio_state *global, enum v4l2_priority local) { return (local < v4l2_prio_max(global)) ? -EBUSY : 0; } EXPORT_SYMBOL(v4l2_prio_check); static ssize_t v4l2_read(struct file *filp, char __user *buf, size_t sz, loff_t *off) { struct video_device *vdev = video_devdata(filp); int ret = -ENODEV; if (!vdev->fops->read) return -EINVAL; if (video_is_registered(vdev)) ret = vdev->fops->read(filp, buf, sz, off); if ((vdev->dev_debug & V4L2_DEV_DEBUG_FOP) && (vdev->dev_debug & V4L2_DEV_DEBUG_STREAMING)) dprintk("%s: read: %zd (%d)\n", video_device_node_name(vdev), sz, ret); return ret; } static ssize_t v4l2_write(struct file *filp, const char __user *buf, size_t sz, loff_t *off) { struct video_device *vdev = video_devdata(filp); int ret = -ENODEV; if (!vdev->fops->write) return -EINVAL; if (video_is_registered(vdev)) ret = vdev->fops->write(filp, buf, sz, off); if ((vdev->dev_debug & V4L2_DEV_DEBUG_FOP) && (vdev->dev_debug & V4L2_DEV_DEBUG_STREAMING)) dprintk("%s: write: %zd (%d)\n", video_device_node_name(vdev), sz, ret); return ret; } static __poll_t v4l2_poll(struct file *filp, struct poll_table_struct *poll) { struct video_device *vdev = video_devdata(filp); __poll_t res = EPOLLERR | EPOLLHUP | EPOLLPRI; if (video_is_registered(vdev)) { if (!vdev->fops->poll) res = DEFAULT_POLLMASK; else res = vdev->fops->poll(filp, poll); } if (vdev->dev_debug & V4L2_DEV_DEBUG_POLL) dprintk("%s: poll: %08x %08x\n", video_device_node_name(vdev), res, poll_requested_events(poll)); return res; } static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct video_device *vdev = video_devdata(filp); int ret = -ENODEV; if (vdev->fops->unlocked_ioctl) { if (video_is_registered(vdev)) ret = vdev->fops->unlocked_ioctl(filp, cmd, arg); } else ret = -ENOTTY; return ret; } #ifdef CONFIG_MMU #define v4l2_get_unmapped_area NULL #else static unsigned long v4l2_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct video_device *vdev = video_devdata(filp); int ret; if (!vdev->fops->get_unmapped_area) return -ENOSYS; if (!video_is_registered(vdev)) return -ENODEV; ret = vdev->fops->get_unmapped_area(filp, addr, len, pgoff, flags); if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP) dprintk("%s: get_unmapped_area (%d)\n", video_device_node_name(vdev), ret); return ret; } #endif static int v4l2_mmap(struct file *filp, struct vm_area_struct *vm) { struct video_device *vdev = video_devdata(filp); int ret = -ENODEV; if (!vdev->fops->mmap) return -ENODEV; if (video_is_registered(vdev)) ret = vdev->fops->mmap(filp, vm); if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP) dprintk("%s: mmap (%d)\n", video_device_node_name(vdev), ret); return ret; } /* Override for the open function */ static int v4l2_open(struct inode *inode, struct file *filp) { struct video_device *vdev; int ret = 0; /* Check if the video device is available */ mutex_lock(&videodev_lock); vdev = video_devdata(filp); /* return ENODEV if the video device has already been removed. */ if (vdev == NULL || !video_is_registered(vdev)) { mutex_unlock(&videodev_lock); return -ENODEV; } /* and increase the device refcount */ video_get(vdev); mutex_unlock(&videodev_lock); if (vdev->fops->open) { if (video_is_registered(vdev)) ret = vdev->fops->open(filp); else ret = -ENODEV; } if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP) dprintk("%s: open (%d)\n", video_device_node_name(vdev), ret); /* decrease the refcount in case of an error */ if (ret) video_put(vdev); return ret; } /* Override for the release function */ static int v4l2_release(struct inode *inode, struct file *filp) { struct video_device *vdev = video_devdata(filp); int ret = 0; /* * We need to serialize the release() with queueing new requests. * The release() may trigger the cancellation of a streaming * operation, and that should not be mixed with queueing a new * request at the same time. */ if (vdev->fops->release) { if (v4l2_device_supports_requests(vdev->v4l2_dev)) { mutex_lock(&vdev->v4l2_dev->mdev->req_queue_mutex); ret = vdev->fops->release(filp); mutex_unlock(&vdev->v4l2_dev->mdev->req_queue_mutex); } else { ret = vdev->fops->release(filp); } } if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP) dprintk("%s: release\n", video_device_node_name(vdev)); /* decrease the refcount unconditionally since the release() return value is ignored. */ video_put(vdev); return ret; } static const struct file_operations v4l2_fops = { .owner = THIS_MODULE, .read = v4l2_read, .write = v4l2_write, .open = v4l2_open, .get_unmapped_area = v4l2_get_unmapped_area, .mmap = v4l2_mmap, .unlocked_ioctl = v4l2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = v4l2_compat_ioctl32, #endif .release = v4l2_release, .poll = v4l2_poll, .llseek = no_llseek, }; /** * get_index - assign stream index number based on v4l2_dev * @vdev: video_device to assign index number to, vdev->v4l2_dev should be assigned * * Note that when this is called the new device has not yet been registered * in the video_device array, but it was able to obtain a minor number. * * This means that we can always obtain a free stream index number since * the worst case scenario is that there are VIDEO_NUM_DEVICES - 1 slots in * use of the video_device array. * * Returns a free index number. */ static int get_index(struct video_device *vdev) { /* This can be static since this function is called with the global videodev_lock held. */ static DECLARE_BITMAP(used, VIDEO_NUM_DEVICES); int i; bitmap_zero(used, VIDEO_NUM_DEVICES); for (i = 0; i < VIDEO_NUM_DEVICES; i++) { if (video_devices[i] != NULL && video_devices[i]->v4l2_dev == vdev->v4l2_dev) { __set_bit(video_devices[i]->index, used); } } return find_first_zero_bit(used, VIDEO_NUM_DEVICES); } #define SET_VALID_IOCTL(ops, cmd, op) \ do { if ((ops)->op) __set_bit(_IOC_NR(cmd), valid_ioctls); } while (0) /* This determines which ioctls are actually implemented in the driver. It's a one-time thing which simplifies video_ioctl2 as it can just do a bit test. Note that drivers can override this by setting bits to 1 in vdev->valid_ioctls. If an ioctl is marked as 1 when this function is called, then that ioctl will actually be marked as unimplemented. It does that by first setting up the local valid_ioctls bitmap, and at the end do a: vdev->valid_ioctls = valid_ioctls & ~(vdev->valid_ioctls) */ static void determine_valid_ioctls(struct video_device *vdev) { const u32 vid_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_CAPTURE_MPLANE | V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_VIDEO_OUTPUT_MPLANE | V4L2_CAP_VIDEO_M2M | V4L2_CAP_VIDEO_M2M_MPLANE; const u32 meta_caps = V4L2_CAP_META_CAPTURE | V4L2_CAP_META_OUTPUT; DECLARE_BITMAP(valid_ioctls, BASE_VIDIOC_PRIVATE); const struct v4l2_ioctl_ops *ops = vdev->ioctl_ops; bool is_vid = vdev->vfl_type == VFL_TYPE_VIDEO && (vdev->device_caps & vid_caps); bool is_vbi = vdev->vfl_type == VFL_TYPE_VBI; bool is_radio = vdev->vfl_type == VFL_TYPE_RADIO; bool is_sdr = vdev->vfl_type == VFL_TYPE_SDR; bool is_tch = vdev->vfl_type == VFL_TYPE_TOUCH; bool is_meta = vdev->vfl_type == VFL_TYPE_VIDEO && (vdev->device_caps & meta_caps); bool is_rx = vdev->vfl_dir != VFL_DIR_TX; bool is_tx = vdev->vfl_dir != VFL_DIR_RX; bool is_io_mc = vdev->device_caps & V4L2_CAP_IO_MC; bool has_streaming = vdev->device_caps & V4L2_CAP_STREAMING; bitmap_zero(valid_ioctls, BASE_VIDIOC_PRIVATE); /* vfl_type and vfl_dir independent ioctls */ SET_VALID_IOCTL(ops, VIDIOC_QUERYCAP, vidioc_querycap); __set_bit(_IOC_NR(VIDIOC_G_PRIORITY), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_S_PRIORITY), valid_ioctls); /* Note: the control handler can also be passed through the filehandle, and that can't be tested here. If the bit for these control ioctls is set, then the ioctl is valid. But if it is 0, then it can still be valid if the filehandle passed the control handler. */ if (vdev->ctrl_handler || ops->vidioc_queryctrl) __set_bit(_IOC_NR(VIDIOC_QUERYCTRL), valid_ioctls); if (vdev->ctrl_handler || ops->vidioc_query_ext_ctrl) __set_bit(_IOC_NR(VIDIOC_QUERY_EXT_CTRL), valid_ioctls); if (vdev->ctrl_handler || ops->vidioc_g_ctrl || ops->vidioc_g_ext_ctrls) __set_bit(_IOC_NR(VIDIOC_G_CTRL), valid_ioctls); if (vdev->ctrl_handler || ops->vidioc_s_ctrl || ops->vidioc_s_ext_ctrls) __set_bit(_IOC_NR(VIDIOC_S_CTRL), valid_ioctls); if (vdev->ctrl_handler || ops->vidioc_g_ext_ctrls) __set_bit(_IOC_NR(VIDIOC_G_EXT_CTRLS), valid_ioctls); if (vdev->ctrl_handler || ops->vidioc_s_ext_ctrls) __set_bit(_IOC_NR(VIDIOC_S_EXT_CTRLS), valid_ioctls); if (vdev->ctrl_handler || ops->vidioc_try_ext_ctrls) __set_bit(_IOC_NR(VIDIOC_TRY_EXT_CTRLS), valid_ioctls); if (vdev->ctrl_handler || ops->vidioc_querymenu) __set_bit(_IOC_NR(VIDIOC_QUERYMENU), valid_ioctls); if (!is_tch) { SET_VALID_IOCTL(ops, VIDIOC_G_FREQUENCY, vidioc_g_frequency); SET_VALID_IOCTL(ops, VIDIOC_S_FREQUENCY, vidioc_s_frequency); } SET_VALID_IOCTL(ops, VIDIOC_LOG_STATUS, vidioc_log_status); #ifdef CONFIG_VIDEO_ADV_DEBUG __set_bit(_IOC_NR(VIDIOC_DBG_G_CHIP_INFO), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_DBG_G_REGISTER), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_DBG_S_REGISTER), valid_ioctls); #endif /* yes, really vidioc_subscribe_event */ SET_VALID_IOCTL(ops, VIDIOC_DQEVENT, vidioc_subscribe_event); SET_VALID_IOCTL(ops, VIDIOC_SUBSCRIBE_EVENT, vidioc_subscribe_event); SET_VALID_IOCTL(ops, VIDIOC_UNSUBSCRIBE_EVENT, vidioc_unsubscribe_event); if (ops->vidioc_enum_freq_bands || ops->vidioc_g_tuner || ops->vidioc_g_modulator) __set_bit(_IOC_NR(VIDIOC_ENUM_FREQ_BANDS), valid_ioctls); if (is_vid) { /* video specific ioctls */ if ((is_rx && (ops->vidioc_enum_fmt_vid_cap || ops->vidioc_enum_fmt_vid_overlay)) || (is_tx && ops->vidioc_enum_fmt_vid_out)) __set_bit(_IOC_NR(VIDIOC_ENUM_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_g_fmt_vid_cap || ops->vidioc_g_fmt_vid_cap_mplane || ops->vidioc_g_fmt_vid_overlay)) || (is_tx && (ops->vidioc_g_fmt_vid_out || ops->vidioc_g_fmt_vid_out_mplane || ops->vidioc_g_fmt_vid_out_overlay))) __set_bit(_IOC_NR(VIDIOC_G_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_s_fmt_vid_cap || ops->vidioc_s_fmt_vid_cap_mplane || ops->vidioc_s_fmt_vid_overlay)) || (is_tx && (ops->vidioc_s_fmt_vid_out || ops->vidioc_s_fmt_vid_out_mplane || ops->vidioc_s_fmt_vid_out_overlay))) __set_bit(_IOC_NR(VIDIOC_S_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_try_fmt_vid_cap || ops->vidioc_try_fmt_vid_cap_mplane || ops->vidioc_try_fmt_vid_overlay)) || (is_tx && (ops->vidioc_try_fmt_vid_out || ops->vidioc_try_fmt_vid_out_mplane || ops->vidioc_try_fmt_vid_out_overlay))) __set_bit(_IOC_NR(VIDIOC_TRY_FMT), valid_ioctls); SET_VALID_IOCTL(ops, VIDIOC_OVERLAY, vidioc_overlay); SET_VALID_IOCTL(ops, VIDIOC_G_FBUF, vidioc_g_fbuf); SET_VALID_IOCTL(ops, VIDIOC_S_FBUF, vidioc_s_fbuf); SET_VALID_IOCTL(ops, VIDIOC_G_JPEGCOMP, vidioc_g_jpegcomp); SET_VALID_IOCTL(ops, VIDIOC_S_JPEGCOMP, vidioc_s_jpegcomp); SET_VALID_IOCTL(ops, VIDIOC_G_ENC_INDEX, vidioc_g_enc_index); SET_VALID_IOCTL(ops, VIDIOC_ENCODER_CMD, vidioc_encoder_cmd); SET_VALID_IOCTL(ops, VIDIOC_TRY_ENCODER_CMD, vidioc_try_encoder_cmd); SET_VALID_IOCTL(ops, VIDIOC_DECODER_CMD, vidioc_decoder_cmd); SET_VALID_IOCTL(ops, VIDIOC_TRY_DECODER_CMD, vidioc_try_decoder_cmd); SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMESIZES, vidioc_enum_framesizes); SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMEINTERVALS, vidioc_enum_frameintervals); if (ops->vidioc_g_selection && !test_bit(_IOC_NR(VIDIOC_G_SELECTION), vdev->valid_ioctls)) { __set_bit(_IOC_NR(VIDIOC_G_CROP), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_CROPCAP), valid_ioctls); } if (ops->vidioc_s_selection && !test_bit(_IOC_NR(VIDIOC_S_SELECTION), vdev->valid_ioctls)) __set_bit(_IOC_NR(VIDIOC_S_CROP), valid_ioctls); SET_VALID_IOCTL(ops, VIDIOC_G_SELECTION, vidioc_g_selection); SET_VALID_IOCTL(ops, VIDIOC_S_SELECTION, vidioc_s_selection); } if (is_meta && is_rx) { /* metadata capture specific ioctls */ SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_meta_cap); SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_meta_cap); SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_meta_cap); SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_meta_cap); } else if (is_meta && is_tx) { /* metadata output specific ioctls */ SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_meta_out); SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_meta_out); SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_meta_out); SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_meta_out); } if (is_vbi) { /* vbi specific ioctls */ if ((is_rx && (ops->vidioc_g_fmt_vbi_cap || ops->vidioc_g_fmt_sliced_vbi_cap)) || (is_tx && (ops->vidioc_g_fmt_vbi_out || ops->vidioc_g_fmt_sliced_vbi_out))) __set_bit(_IOC_NR(VIDIOC_G_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_s_fmt_vbi_cap || ops->vidioc_s_fmt_sliced_vbi_cap)) || (is_tx && (ops->vidioc_s_fmt_vbi_out || ops->vidioc_s_fmt_sliced_vbi_out))) __set_bit(_IOC_NR(VIDIOC_S_FMT), valid_ioctls); if ((is_rx && (ops->vidioc_try_fmt_vbi_cap || ops->vidioc_try_fmt_sliced_vbi_cap)) || (is_tx && (ops->vidioc_try_fmt_vbi_out || ops->vidioc_try_fmt_sliced_vbi_out))) __set_bit(_IOC_NR(VIDIOC_TRY_FMT), valid_ioctls); SET_VALID_IOCTL(ops, VIDIOC_G_SLICED_VBI_CAP, vidioc_g_sliced_vbi_cap); } else if (is_tch) { /* touch specific ioctls */ SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_vid_cap); SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_vid_cap); SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_vid_cap); SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_vid_cap); SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMESIZES, vidioc_enum_framesizes); SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMEINTERVALS, vidioc_enum_frameintervals); SET_VALID_IOCTL(ops, VIDIOC_ENUMINPUT, vidioc_enum_input); SET_VALID_IOCTL(ops, VIDIOC_G_INPUT, vidioc_g_input); SET_VALID_IOCTL(ops, VIDIOC_S_INPUT, vidioc_s_input); SET_VALID_IOCTL(ops, VIDIOC_G_PARM, vidioc_g_parm); SET_VALID_IOCTL(ops, VIDIOC_S_PARM, vidioc_s_parm); } else if (is_sdr && is_rx) { /* SDR receiver specific ioctls */ SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_sdr_cap); SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_sdr_cap); SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_sdr_cap); SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_sdr_cap); } else if (is_sdr && is_tx) { /* SDR transmitter specific ioctls */ SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_sdr_out); SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_sdr_out); SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_sdr_out); SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_sdr_out); } if (has_streaming) { /* ioctls valid for streaming I/O */ SET_VALID_IOCTL(ops, VIDIOC_REQBUFS, vidioc_reqbufs); SET_VALID_IOCTL(ops, VIDIOC_QUERYBUF, vidioc_querybuf); SET_VALID_IOCTL(ops, VIDIOC_QBUF, vidioc_qbuf); SET_VALID_IOCTL(ops, VIDIOC_EXPBUF, vidioc_expbuf); SET_VALID_IOCTL(ops, VIDIOC_DQBUF, vidioc_dqbuf); SET_VALID_IOCTL(ops, VIDIOC_CREATE_BUFS, vidioc_create_bufs); SET_VALID_IOCTL(ops, VIDIOC_PREPARE_BUF, vidioc_prepare_buf); SET_VALID_IOCTL(ops, VIDIOC_STREAMON, vidioc_streamon); SET_VALID_IOCTL(ops, VIDIOC_STREAMOFF, vidioc_streamoff); } if (is_vid || is_vbi || is_meta) { /* ioctls valid for video, vbi and metadata */ if (ops->vidioc_s_std) __set_bit(_IOC_NR(VIDIOC_ENUMSTD), valid_ioctls); SET_VALID_IOCTL(ops, VIDIOC_S_STD, vidioc_s_std); SET_VALID_IOCTL(ops, VIDIOC_G_STD, vidioc_g_std); if (is_rx) { SET_VALID_IOCTL(ops, VIDIOC_QUERYSTD, vidioc_querystd); if (is_io_mc) { __set_bit(_IOC_NR(VIDIOC_ENUMINPUT), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_G_INPUT), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_S_INPUT), valid_ioctls); } else { SET_VALID_IOCTL(ops, VIDIOC_ENUMINPUT, vidioc_enum_input); SET_VALID_IOCTL(ops, VIDIOC_G_INPUT, vidioc_g_input); SET_VALID_IOCTL(ops, VIDIOC_S_INPUT, vidioc_s_input); } SET_VALID_IOCTL(ops, VIDIOC_ENUMAUDIO, vidioc_enumaudio); SET_VALID_IOCTL(ops, VIDIOC_G_AUDIO, vidioc_g_audio); SET_VALID_IOCTL(ops, VIDIOC_S_AUDIO, vidioc_s_audio); SET_VALID_IOCTL(ops, VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_S_EDID, vidioc_s_edid); } if (is_tx) { if (is_io_mc) { __set_bit(_IOC_NR(VIDIOC_ENUMOUTPUT), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_G_OUTPUT), valid_ioctls); __set_bit(_IOC_NR(VIDIOC_S_OUTPUT), valid_ioctls); } else { SET_VALID_IOCTL(ops, VIDIOC_ENUMOUTPUT, vidioc_enum_output); SET_VALID_IOCTL(ops, VIDIOC_G_OUTPUT, vidioc_g_output); SET_VALID_IOCTL(ops, VIDIOC_S_OUTPUT, vidioc_s_output); } SET_VALID_IOCTL(ops, VIDIOC_ENUMAUDOUT, vidioc_enumaudout); SET_VALID_IOCTL(ops, VIDIOC_G_AUDOUT, vidioc_g_audout); SET_VALID_IOCTL(ops, VIDIOC_S_AUDOUT, vidioc_s_audout); } if (ops->vidioc_g_parm || ops->vidioc_g_std) __set_bit(_IOC_NR(VIDIOC_G_PARM), valid_ioctls); SET_VALID_IOCTL(ops, VIDIOC_S_PARM, vidioc_s_parm); SET_VALID_IOCTL(ops, VIDIOC_S_DV_TIMINGS, vidioc_s_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap); SET_VALID_IOCTL(ops, VIDIOC_G_EDID, vidioc_g_edid); } if (is_tx && (is_radio || is_sdr)) { /* radio transmitter only ioctls */ SET_VALID_IOCTL(ops, VIDIOC_G_MODULATOR, vidioc_g_modulator); SET_VALID_IOCTL(ops, VIDIOC_S_MODULATOR, vidioc_s_modulator); } if (is_rx && !is_tch) { /* receiver only ioctls */ SET_VALID_IOCTL(ops, VIDIOC_G_TUNER, vidioc_g_tuner); SET_VALID_IOCTL(ops, VIDIOC_S_TUNER, vidioc_s_tuner); SET_VALID_IOCTL(ops, VIDIOC_S_HW_FREQ_SEEK, vidioc_s_hw_freq_seek); } bitmap_andnot(vdev->valid_ioctls, valid_ioctls, vdev->valid_ioctls, BASE_VIDIOC_PRIVATE); } static int video_register_media_controller(struct video_device *vdev) { #if defined(CONFIG_MEDIA_CONTROLLER) u32 intf_type; int ret; /* Memory-to-memory devices are more complex and use * their own function to register its mc entities. */ if (!vdev->v4l2_dev->mdev || vdev->vfl_dir == VFL_DIR_M2M) return 0; vdev->entity.obj_type = MEDIA_ENTITY_TYPE_VIDEO_DEVICE; vdev->entity.function = MEDIA_ENT_F_UNKNOWN; switch (vdev->vfl_type) { case VFL_TYPE_VIDEO: intf_type = MEDIA_INTF_T_V4L_VIDEO; vdev->entity.function = MEDIA_ENT_F_IO_V4L; break; case VFL_TYPE_VBI: intf_type = MEDIA_INTF_T_V4L_VBI; vdev->entity.function = MEDIA_ENT_F_IO_VBI; break; case VFL_TYPE_SDR: intf_type = MEDIA_INTF_T_V4L_SWRADIO; vdev->entity.function = MEDIA_ENT_F_IO_SWRADIO; break; case VFL_TYPE_TOUCH: intf_type = MEDIA_INTF_T_V4L_TOUCH; vdev->entity.function = MEDIA_ENT_F_IO_V4L; break; case VFL_TYPE_RADIO: intf_type = MEDIA_INTF_T_V4L_RADIO; /* * Radio doesn't have an entity at the V4L2 side to represent * radio input or output. Instead, the audio input/output goes * via either physical wires or ALSA. */ break; case VFL_TYPE_SUBDEV: intf_type = MEDIA_INTF_T_V4L_SUBDEV; /* Entity will be created via v4l2_device_register_subdev() */ break; default: return 0; } if (vdev->entity.function != MEDIA_ENT_F_UNKNOWN) { vdev->entity.name = vdev->name; /* Needed just for backward compatibility with legacy MC API */ vdev->entity.info.dev.major = VIDEO_MAJOR; vdev->entity.info.dev.minor = vdev->minor; ret = media_device_register_entity(vdev->v4l2_dev->mdev, &vdev->entity); if (ret < 0) { pr_warn("%s: media_device_register_entity failed\n", __func__); return ret; } } vdev->intf_devnode = media_devnode_create(vdev->v4l2_dev->mdev, intf_type, 0, VIDEO_MAJOR, vdev->minor); if (!vdev->intf_devnode) { media_device_unregister_entity(&vdev->entity); return -ENOMEM; } if (vdev->entity.function != MEDIA_ENT_F_UNKNOWN) { struct media_link *link; link = media_create_intf_link(&vdev->entity, &vdev->intf_devnode->intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) { media_devnode_remove(vdev->intf_devnode); media_device_unregister_entity(&vdev->entity); return -ENOMEM; } } /* FIXME: how to create the other interface links? */ #endif return 0; } int __video_register_device(struct video_device *vdev, enum vfl_devnode_type type, int nr, int warn_if_nr_in_use, struct module *owner) { int i = 0; int ret; int minor_offset = 0; int minor_cnt = VIDEO_NUM_DEVICES; const char *name_base; /* A minor value of -1 marks this video device as never having been registered */ vdev->minor = -1; /* the release callback MUST be present */ if (WARN_ON(!vdev->release)) return -EINVAL; /* the v4l2_dev pointer MUST be present */ if (WARN_ON(!vdev->v4l2_dev)) return -EINVAL; /* the device_caps field MUST be set for all but subdevs */ if (WARN_ON(type != VFL_TYPE_SUBDEV && !vdev->device_caps)) return -EINVAL; /* v4l2_fh support */ spin_lock_init(&vdev->fh_lock); INIT_LIST_HEAD(&vdev->fh_list); /* Part 1: check device type */ switch (type) { case VFL_TYPE_VIDEO: name_base = "video"; break; case VFL_TYPE_VBI: name_base = "vbi"; break; case VFL_TYPE_RADIO: name_base = "radio"; break; case VFL_TYPE_SUBDEV: name_base = "v4l-subdev"; break; case VFL_TYPE_SDR: /* Use device name 'swradio' because 'sdr' was already taken. */ name_base = "swradio"; break; case VFL_TYPE_TOUCH: name_base = "v4l-touch"; break; default: pr_err("%s called with unknown type: %d\n", __func__, type); return -EINVAL; } vdev->vfl_type = type; vdev->cdev = NULL; if (vdev->dev_parent == NULL) vdev->dev_parent = vdev->v4l2_dev->dev; if (vdev->ctrl_handler == NULL) vdev->ctrl_handler = vdev->v4l2_dev->ctrl_handler; /* If the prio state pointer is NULL, then use the v4l2_device prio state. */ if (vdev->prio == NULL) vdev->prio = &vdev->v4l2_dev->prio; /* Part 2: find a free minor, device node number and device index. */ #ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES /* Keep the ranges for the first four types for historical * reasons. * Newer devices (not yet in place) should use the range * of 128-191 and just pick the first free minor there * (new style). */ switch (type) { case VFL_TYPE_VIDEO: minor_offset = 0; minor_cnt = 64; break; case VFL_TYPE_RADIO: minor_offset = 64; minor_cnt = 64; break; case VFL_TYPE_VBI: minor_offset = 224; minor_cnt = 32; break; default: minor_offset = 128; minor_cnt = 64; break; } #endif /* Pick a device node number */ mutex_lock(&videodev_lock); nr = devnode_find(vdev, nr == -1 ? 0 : nr, minor_cnt); if (nr == minor_cnt) nr = devnode_find(vdev, 0, minor_cnt); if (nr == minor_cnt) { pr_err("could not get a free device node number\n"); mutex_unlock(&videodev_lock); return -ENFILE; } #ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES /* 1-on-1 mapping of device node number to minor number */ i = nr; #else /* The device node number and minor numbers are independent, so we just find the first free minor number. */ for (i = 0; i < VIDEO_NUM_DEVICES; i++) if (video_devices[i] == NULL) break; if (i == VIDEO_NUM_DEVICES) { mutex_unlock(&videodev_lock); pr_err("could not get a free minor\n"); return -ENFILE; } #endif vdev->minor = i + minor_offset; vdev->num = nr; /* Should not happen since we thought this minor was free */ if (WARN_ON(video_devices[vdev->minor])) { mutex_unlock(&videodev_lock); pr_err("video_device not empty!\n"); return -ENFILE; } devnode_set(vdev); vdev->index = get_index(vdev); video_devices[vdev->minor] = vdev; mutex_unlock(&videodev_lock); if (vdev->ioctl_ops) determine_valid_ioctls(vdev); /* Part 3: Initialize the character device */ vdev->cdev = cdev_alloc(); if (vdev->cdev == NULL) { ret = -ENOMEM; goto cleanup; } vdev->cdev->ops = &v4l2_fops; vdev->cdev->owner = owner; ret = cdev_add(vdev->cdev, MKDEV(VIDEO_MAJOR, vdev->minor), 1); if (ret < 0) { pr_err("%s: cdev_add failed\n", __func__); kfree(vdev->cdev); vdev->cdev = NULL; goto cleanup; } /* Part 4: register the device with sysfs */ vdev->dev.class = &video_class; vdev->dev.devt = MKDEV(VIDEO_MAJOR, vdev->minor); vdev->dev.parent = vdev->dev_parent; dev_set_name(&vdev->dev, "%s%d", name_base, vdev->num); ret = device_register(&vdev->dev); if (ret < 0) { pr_err("%s: device_register failed\n", __func__); goto cleanup; } /* Register the release callback that will be called when the last reference to the device goes away. */ vdev->dev.release = v4l2_device_release; if (nr != -1 && nr != vdev->num && warn_if_nr_in_use) pr_warn("%s: requested %s%d, got %s\n", __func__, name_base, nr, video_device_node_name(vdev)); /* Increase v4l2_device refcount */ v4l2_device_get(vdev->v4l2_dev); /* Part 5: Register the entity. */ ret = video_register_media_controller(vdev); /* Part 6: Activate this minor. The char device can now be used. */ set_bit(V4L2_FL_REGISTERED, &vdev->flags); return 0; cleanup: mutex_lock(&videodev_lock); if (vdev->cdev) cdev_del(vdev->cdev); video_devices[vdev->minor] = NULL; devnode_clear(vdev); mutex_unlock(&videodev_lock); /* Mark this video device as never having been registered. */ vdev->minor = -1; return ret; } EXPORT_SYMBOL(__video_register_device); /** * video_unregister_device - unregister a video4linux device * @vdev: the device to unregister * * This unregisters the passed device. Future open calls will * be met with errors. */ void video_unregister_device(struct video_device *vdev) { /* Check if vdev was ever registered at all */ if (!vdev || !video_is_registered(vdev)) return; mutex_lock(&videodev_lock); /* This must be in a critical section to prevent a race with v4l2_open. * Once this bit has been cleared video_get may never be called again. */ clear_bit(V4L2_FL_REGISTERED, &vdev->flags); mutex_unlock(&videodev_lock); if (test_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags)) v4l2_event_wake_all(vdev); device_unregister(&vdev->dev); } EXPORT_SYMBOL(video_unregister_device); #if defined(CONFIG_MEDIA_CONTROLLER) __must_check int video_device_pipeline_start(struct video_device *vdev, struct media_pipeline *pipe) { struct media_entity *entity = &vdev->entity; if (entity->num_pads != 1) return -ENODEV; return media_pipeline_start(&entity->pads[0], pipe); } EXPORT_SYMBOL_GPL(video_device_pipeline_start); __must_check int __video_device_pipeline_start(struct video_device *vdev, struct media_pipeline *pipe) { struct media_entity *entity = &vdev->entity; if (entity->num_pads != 1) return -ENODEV; return __media_pipeline_start(&entity->pads[0], pipe); } EXPORT_SYMBOL_GPL(__video_device_pipeline_start); void video_device_pipeline_stop(struct video_device *vdev) { struct media_entity *entity = &vdev->entity; if (WARN_ON(entity->num_pads != 1)) return; return media_pipeline_stop(&entity->pads[0]); } EXPORT_SYMBOL_GPL(video_device_pipeline_stop); void __video_device_pipeline_stop(struct video_device *vdev) { struct media_entity *entity = &vdev->entity; if (WARN_ON(entity->num_pads != 1)) return; return __media_pipeline_stop(&entity->pads[0]); } EXPORT_SYMBOL_GPL(__video_device_pipeline_stop); __must_check int video_device_pipeline_alloc_start(struct video_device *vdev) { struct media_entity *entity = &vdev->entity; if (entity->num_pads != 1) return -ENODEV; return media_pipeline_alloc_start(&entity->pads[0]); } EXPORT_SYMBOL_GPL(video_device_pipeline_alloc_start); struct media_pipeline *video_device_pipeline(struct video_device *vdev) { struct media_entity *entity = &vdev->entity; if (WARN_ON(entity->num_pads != 1)) return NULL; return media_pad_pipeline(&entity->pads[0]); } EXPORT_SYMBOL_GPL(video_device_pipeline); #endif /* CONFIG_MEDIA_CONTROLLER */ /* * Initialise video for linux */ static int __init videodev_init(void) { dev_t dev = MKDEV(VIDEO_MAJOR, 0); int ret; pr_info("Linux video capture interface: v2.00\n"); ret = register_chrdev_region(dev, VIDEO_NUM_DEVICES, VIDEO_NAME); if (ret < 0) { pr_warn("videodev: unable to get major %d\n", VIDEO_MAJOR); return ret; } ret = class_register(&video_class); if (ret < 0) { unregister_chrdev_region(dev, VIDEO_NUM_DEVICES); pr_warn("video_dev: class_register failed\n"); return -EIO; } return 0; } static void __exit videodev_exit(void) { dev_t dev = MKDEV(VIDEO_MAJOR, 0); class_unregister(&video_class); unregister_chrdev_region(dev, VIDEO_NUM_DEVICES); } subsys_initcall(videodev_init); module_exit(videodev_exit) MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@kernel.org>, Bill Dirks, Justin Schoeman, Gerd Knorr"); MODULE_DESCRIPTION("Video4Linux2 core driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV_MAJOR(VIDEO_MAJOR); |
1 795 35 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner */ #include "bat_v.h" #include "main.h" #include <linux/atomic.h> #include <linux/cache.h> #include <linux/errno.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/workqueue.h> #include <net/genetlink.h> #include <net/netlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bat_v_elp.h" #include "bat_v_ogm.h" #include "gateway_client.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "netlink.h" #include "originator.h" static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if) { batadv_v_elp_iface_activate(primary_if, hard_iface); batadv_hardif_put(primary_if); } /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can * set the interface as ACTIVE right away, without any risk of race * condition */ if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) hard_iface->if_status = BATADV_IF_ACTIVE; } static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) { int ret; ret = batadv_v_elp_iface_enable(hard_iface); if (ret < 0) return ret; ret = batadv_v_ogm_iface_enable(hard_iface); if (ret < 0) batadv_v_elp_iface_disable(hard_iface); return ret; } static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface) { batadv_v_ogm_iface_disable(hard_iface); batadv_v_elp_iface_disable(hard_iface); } static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface) { batadv_v_elp_primary_iface_set(hard_iface); batadv_v_ogm_primary_iface_set(hard_iface); } /** * batadv_v_iface_update_mac() - react to hard-interface MAC address change * @hard_iface: the modified interface * * If the modified interface is the primary one, update the originator * address in the ELP and OGM messages to reflect the new MAC address. */ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if != hard_iface) goto out; batadv_v_primary_iface_set(hard_iface); out: batadv_hardif_put(primary_if); } static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { ewma_throughput_init(&hardif_neigh->bat_v.throughput); INIT_WORK(&hardif_neigh->bat_v.metric_work, batadv_v_elp_throughput_metric_update); } /** * batadv_v_neigh_dump_neigh() - Dump a neighbour into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @hardif_neigh: Neighbour to dump * * Return: Error code, or 0 on success */ static int batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_hardif_neigh_node *hardif_neigh) { void *hdr; unsigned int last_seen_msecs; u32 throughput; last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput); throughput = throughput * 100; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS); if (!hdr) return -ENOBUFS; if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, hardif_neigh->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, hardif_neigh->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, hardif_neigh->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs) || nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_v_neigh_dump_hardif() - Dump the neighbours of a hard interface into * a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @hard_iface: The hard interface to be dumped * @idx_s: Entries to be skipped * * This function assumes the caller holds rcu_read_lock(). * * Return: Error code, or 0 on success */ static int batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface, int *idx_s) { struct batadv_hardif_neigh_node *hardif_neigh; int idx = 0; hlist_for_each_entry_rcu(hardif_neigh, &hard_iface->neigh_list, list) { if (idx++ < *idx_s) continue; if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) { *idx_s = idx - 1; return -EMSGSIZE; } } *idx_s = 0; return 0; } /** * batadv_v_neigh_dump() - Dump the neighbours of a hard interface into a * message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @single_hardif: Limit dumping to this hard interface */ static void batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_hard_iface *single_hardif) { struct batadv_hard_iface *hard_iface; int i_hardif = 0; int i_hardif_s = cb->args[0]; int idx = cb->args[1]; int portid = NETLINK_CB(cb->skb).portid; rcu_read_lock(); if (single_hardif) { if (i_hardif_s == 0) { if (batadv_v_neigh_dump_hardif(msg, portid, cb->nlh->nlmsg_seq, bat_priv, single_hardif, &idx) == 0) i_hardif++; } } else { list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (i_hardif++ < i_hardif_s) continue; if (batadv_v_neigh_dump_hardif(msg, portid, cb->nlh->nlmsg_seq, bat_priv, hard_iface, &idx)) { i_hardif--; break; } } } rcu_read_unlock(); cb->args[0] = i_hardif; cb->args[1] = idx; } /** * batadv_v_orig_dump_subentry() - Dump an originator subentry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @neigh_node: Single hops neighbour * @best: Is the best originator * * Return: Error code, or 0 on success */ static int batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node, bool best) { struct batadv_neigh_ifinfo *n_ifinfo; unsigned int last_seen_msecs; u32 throughput; void *hdr; n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); if (!n_ifinfo) return 0; throughput = n_ifinfo->bat_v.throughput * 100; batadv_neigh_ifinfo_put(n_ifinfo); last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); if (if_outgoing != BATADV_IF_DEFAULT && if_outgoing != neigh_node->if_incoming) return 0; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS); if (!hdr) return -ENOBUFS; if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) || nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, neigh_node->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, neigh_node->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, neigh_node->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) || nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) goto nla_put_failure; if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_v_orig_dump_entry() - Dump an originator entry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @sub_s: Number of sub entries to skip * * This function assumes the caller holds rcu_read_lock(). * * Return: Error code, or 0 on success */ static int batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct batadv_orig_node *orig_node, int *sub_s) { struct batadv_neigh_node *neigh_node_best; struct batadv_neigh_node *neigh_node; int sub = 0; bool best; neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); if (!neigh_node_best) goto out; hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { if (sub++ < *sub_s) continue; best = (neigh_node == neigh_node_best); if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv, if_outgoing, orig_node, neigh_node, best)) { batadv_neigh_node_put(neigh_node_best); *sub_s = sub - 1; return -EMSGSIZE; } } out: batadv_neigh_node_put(neigh_node_best); *sub_s = 0; return 0; } /** * batadv_v_orig_dump_bucket() - Dump an originator bucket into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @head: Bucket to be dumped * @idx_s: Number of entries to be skipped * @sub: Number of sub entries to be skipped * * Return: Error code, or 0 on success */ static int batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct hlist_head *head, int *idx_s, int *sub) { struct batadv_orig_node *orig_node; int idx = 0; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { if (idx++ < *idx_s) continue; if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv, if_outgoing, orig_node, sub)) { rcu_read_unlock(); *idx_s = idx - 1; return -EMSGSIZE; } } rcu_read_unlock(); *idx_s = 0; *sub = 0; return 0; } /** * batadv_v_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface */ static void batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; int bucket = cb->args[0]; int idx = cb->args[1]; int sub = cb->args[2]; int portid = NETLINK_CB(cb->skb).portid; while (bucket < hash->size) { head = &hash->table[bucket]; if (batadv_v_orig_dump_bucket(msg, portid, cb->nlh->nlmsg_seq, bat_priv, if_outgoing, head, &idx, &sub)) break; bucket++; } cb->args[0] = bucket; cb->args[1] = idx; cb->args[2] = sub; } static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; batadv_neigh_ifinfo_put(ifinfo2); err_ifinfo2: batadv_neigh_ifinfo_put(ifinfo1); err_ifinfo1: return ret; } static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; u32 threshold; bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; threshold = ifinfo1->bat_v.throughput - threshold; ret = ifinfo2->bat_v.throughput > threshold; batadv_neigh_ifinfo_put(ifinfo2); err_ifinfo2: batadv_neigh_ifinfo_put(ifinfo1); err_ifinfo1: return ret; } /** * batadv_v_init_sel_class() - initialize GW selection class * @bat_priv: the bat priv with all the soft interface information */ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) { /* set default throughput difference threshold to 5Mbps */ atomic_set(&bat_priv->gw.sel_class, 50); } /** * batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW * @gw_node: the GW to retrieve the metric for * @bw: the pointer where the metric will be stored. The metric is computed as * the minimum between the GW advertised throughput and the path throughput to * it in the mesh * * Return: 0 on success, -1 on failure */ static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw) { struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_orig_node *orig_node; struct batadv_neigh_node *router; int ret = -1; orig_node = gw_node->orig_node; router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router) goto out; router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); if (!router_ifinfo) goto out; /* the GW metric is computed as the minimum between the path throughput * to reach the GW itself and the advertised bandwidth. * This gives us an approximation of the effective throughput that the * client can expect via this particular GW node */ *bw = router_ifinfo->bat_v.throughput; *bw = min_t(u32, *bw, gw_node->bandwidth_down); ret = 0; out: batadv_neigh_node_put(router); batadv_neigh_ifinfo_put(router_ifinfo); return ret; } /** * batadv_v_gw_get_best_gw_node() - retrieve the best GW node * @bat_priv: the bat priv with all the soft interface information * * Return: the GW node having the best GW-metric, NULL if no GW is known */ static struct batadv_gw_node * batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node, *curr_gw = NULL; u32 max_bw = 0, bw; rcu_read_lock(); hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { if (!kref_get_unless_zero(&gw_node->refcount)) continue; if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) goto next; if (curr_gw && bw <= max_bw) goto next; batadv_gw_node_put(curr_gw); curr_gw = gw_node; kref_get(&curr_gw->refcount); max_bw = bw; next: batadv_gw_node_put(gw_node); } rcu_read_unlock(); return curr_gw; } /** * batadv_v_gw_is_eligible() - check if a originator would be selected as GW * @bat_priv: the bat priv with all the soft interface information * @curr_gw_orig: originator representing the currently selected GW * @orig_node: the originator representing the new candidate * * Return: true if orig_node can be selected as current GW, false otherwise */ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node) { struct batadv_gw_node *curr_gw, *orig_gw = NULL; u32 gw_throughput, orig_throughput, threshold; bool ret = false; threshold = atomic_read(&bat_priv->gw.sel_class); curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig); if (!curr_gw) { ret = true; goto out; } if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) { ret = true; goto out; } orig_gw = batadv_gw_node_get(bat_priv, orig_node); if (!orig_gw) goto out; if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) goto out; if (orig_throughput < gw_throughput) goto out; if ((orig_throughput - gw_throughput) < threshold) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n", gw_throughput, orig_throughput); ret = true; out: batadv_gw_node_put(curr_gw); batadv_gw_node_put(orig_gw); return ret; } /** * batadv_v_gw_dump_entry() - Dump a gateway into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @gw_node: Gateway to be dumped * * Return: Error code, or 0 on success */ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_gw_node *gw_node) { struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; struct batadv_gw_node *curr_gw = NULL; int ret = 0; void *hdr; router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); if (!router) goto out; router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); if (!router_ifinfo) goto out; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); if (!hdr) { ret = -ENOBUFS; goto out; } genl_dump_check_consistent(cb, hdr); ret = -EMSGSIZE; if (curr_gw == gw_node) { if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { genlmsg_cancel(msg, hdr); goto out; } } if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, gw_node->orig_node->orig)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, router_ifinfo->bat_v.throughput)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, router->if_incoming->net_dev->name)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, router->if_incoming->net_dev->ifindex)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, gw_node->bandwidth_down)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) { genlmsg_cancel(msg, hdr); goto out; } genlmsg_end(msg, hdr); ret = 0; out: batadv_gw_node_put(curr_gw); batadv_neigh_ifinfo_put(router_ifinfo); batadv_neigh_node_put(router); return ret; } /** * batadv_v_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information */ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv) { int portid = NETLINK_CB(cb->skb).portid; struct batadv_gw_node *gw_node; int idx_skip = cb->args[0]; int idx = 0; spin_lock_bh(&bat_priv->gw.list_lock); cb->seq = bat_priv->gw.generation << 1 | 1; hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) { if (idx++ < idx_skip) continue; if (batadv_v_gw_dump_entry(msg, portid, cb, bat_priv, gw_node)) { idx_skip = idx - 1; goto unlock; } } idx_skip = idx; unlock: spin_unlock_bh(&bat_priv->gw.list_lock); cb->args[0] = idx_skip; } static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", .iface = { .activate = batadv_v_iface_activate, .enable = batadv_v_iface_enable, .disable = batadv_v_iface_disable, .update_mac = batadv_v_iface_update_mac, .primary_set = batadv_v_primary_iface_set, }, .neigh = { .hardif_init = batadv_v_hardif_neigh_init, .cmp = batadv_v_neigh_cmp, .is_similar_or_better = batadv_v_neigh_is_sob, .dump = batadv_v_neigh_dump, }, .orig = { .dump = batadv_v_orig_dump, }, .gw = { .init_sel_class = batadv_v_init_sel_class, .sel_class_max = U32_MAX, .get_best_gw_node = batadv_v_gw_get_best_gw_node, .is_eligible = batadv_v_gw_is_eligible, .dump = batadv_v_gw_dump, }, }; /** * batadv_v_hardif_init() - initialize the algorithm specific fields in the * hard-interface object * @hard_iface: the hard-interface to initialize */ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) { /* enable link throughput auto-detection by setting the throughput * override to zero */ atomic_set(&hard_iface->bat_v.throughput_override, 0); atomic_set(&hard_iface->bat_v.elp_interval, 500); hard_iface->bat_v.aggr_len = 0; skb_queue_head_init(&hard_iface->bat_v.aggr_list); INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq, batadv_v_ogm_aggr_work); } /** * batadv_v_mesh_init() - initialize the B.A.T.M.A.N. V private resources for a * mesh * @bat_priv: the object representing the mesh interface to initialise * * Return: 0 on success or a negative error code otherwise */ int batadv_v_mesh_init(struct batadv_priv *bat_priv) { int ret = 0; ret = batadv_v_ogm_init(bat_priv); if (ret < 0) return ret; return 0; } /** * batadv_v_mesh_free() - free the B.A.T.M.A.N. V private resources for a mesh * @bat_priv: the object representing the mesh interface to free */ void batadv_v_mesh_free(struct batadv_priv *bat_priv) { batadv_v_ogm_free(bat_priv); } /** * batadv_v_init() - B.A.T.M.A.N. V initialization function * * Description: Takes care of initializing all the subcomponents. * It is invoked upon module load only. * * Return: 0 on success or a negative error code otherwise */ int __init batadv_v_init(void) { int ret; /* B.A.T.M.A.N. V echo location protocol packet */ ret = batadv_recv_handler_register(BATADV_ELP, batadv_v_elp_packet_recv); if (ret < 0) return ret; ret = batadv_recv_handler_register(BATADV_OGM2, batadv_v_ogm_packet_recv); if (ret < 0) goto elp_unregister; ret = batadv_algo_register(&batadv_batman_v); if (ret < 0) goto ogm_unregister; return ret; ogm_unregister: batadv_recv_handler_unregister(BATADV_OGM2); elp_unregister: batadv_recv_handler_unregister(BATADV_ELP); return ret; } |
4 1 1 1 4 11 3 1 2 1 1 8 5 5 3 2 6 3 3 10 10 3 1 1 5 2 14 14 11 2 3 14 10 2 14 10 2 4 14 5 11 2 3 2 2 36 35 18 18 39 1 2 36 1 33 2 33 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 | // SPDX-License-Identifier: GPL-2.0-only /* * Media device * * Copyright (C) 2010 Nokia Corporation * * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> * Sakari Ailus <sakari.ailus@iki.fi> */ #include <linux/compat.h> #include <linux/export.h> #include <linux/idr.h> #include <linux/ioctl.h> #include <linux/media.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/pci.h> #include <linux/usb.h> #include <linux/version.h> #include <media/media-device.h> #include <media/media-devnode.h> #include <media/media-entity.h> #include <media/media-request.h> #ifdef CONFIG_MEDIA_CONTROLLER /* * Legacy defines from linux/media.h. This is the only place we need this * so we just define it here. The media.h header doesn't expose it to the * kernel to prevent it from being used by drivers, but here (and only here!) * we need it to handle the legacy behavior. */ #define MEDIA_ENT_SUBTYPE_MASK 0x0000ffff #define MEDIA_ENT_T_DEVNODE_UNKNOWN (MEDIA_ENT_F_OLD_BASE | \ MEDIA_ENT_SUBTYPE_MASK) /* ----------------------------------------------------------------------------- * Userspace API */ static inline void __user *media_get_uptr(__u64 arg) { return (void __user *)(uintptr_t)arg; } static int media_device_open(struct file *filp) { return 0; } static int media_device_close(struct file *filp) { return 0; } static long media_device_get_info(struct media_device *dev, void *arg) { struct media_device_info *info = arg; memset(info, 0, sizeof(*info)); if (dev->driver_name[0]) strscpy(info->driver, dev->driver_name, sizeof(info->driver)); else strscpy(info->driver, dev->dev->driver->name, sizeof(info->driver)); strscpy(info->model, dev->model, sizeof(info->model)); strscpy(info->serial, dev->serial, sizeof(info->serial)); strscpy(info->bus_info, dev->bus_info, sizeof(info->bus_info)); info->media_version = LINUX_VERSION_CODE; info->driver_version = info->media_version; info->hw_revision = dev->hw_revision; return 0; } static struct media_entity *find_entity(struct media_device *mdev, u32 id) { struct media_entity *entity; int next = id & MEDIA_ENT_ID_FLAG_NEXT; id &= ~MEDIA_ENT_ID_FLAG_NEXT; media_device_for_each_entity(entity, mdev) { if (((media_entity_id(entity) == id) && !next) || ((media_entity_id(entity) > id) && next)) { return entity; } } return NULL; } static long media_device_enum_entities(struct media_device *mdev, void *arg) { struct media_entity_desc *entd = arg; struct media_entity *ent; ent = find_entity(mdev, entd->id); if (ent == NULL) return -EINVAL; memset(entd, 0, sizeof(*entd)); entd->id = media_entity_id(ent); if (ent->name) strscpy(entd->name, ent->name, sizeof(entd->name)); entd->type = ent->function; entd->revision = 0; /* Unused */ entd->flags = ent->flags; entd->group_id = 0; /* Unused */ entd->pads = ent->num_pads; entd->links = ent->num_links - ent->num_backlinks; /* * Workaround for a bug at media-ctl <= v1.10 that makes it to * do the wrong thing if the entity function doesn't belong to * either MEDIA_ENT_F_OLD_BASE or MEDIA_ENT_F_OLD_SUBDEV_BASE * Ranges. * * Non-subdevices are expected to be at the MEDIA_ENT_F_OLD_BASE, * or, otherwise, will be silently ignored by media-ctl when * printing the graphviz diagram. So, map them into the devnode * old range. */ if (ent->function < MEDIA_ENT_F_OLD_BASE || ent->function > MEDIA_ENT_F_TUNER) { if (is_media_entity_v4l2_subdev(ent)) entd->type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN; else if (ent->function != MEDIA_ENT_F_IO_V4L) entd->type = MEDIA_ENT_T_DEVNODE_UNKNOWN; } memcpy(&entd->raw, &ent->info, sizeof(ent->info)); return 0; } static void media_device_kpad_to_upad(const struct media_pad *kpad, struct media_pad_desc *upad) { upad->entity = media_entity_id(kpad->entity); upad->index = kpad->index; upad->flags = kpad->flags; } static long media_device_enum_links(struct media_device *mdev, void *arg) { struct media_links_enum *links = arg; struct media_entity *entity; entity = find_entity(mdev, links->entity); if (entity == NULL) return -EINVAL; if (links->pads) { unsigned int p; for (p = 0; p < entity->num_pads; p++) { struct media_pad_desc pad; memset(&pad, 0, sizeof(pad)); media_device_kpad_to_upad(&entity->pads[p], &pad); if (copy_to_user(&links->pads[p], &pad, sizeof(pad))) return -EFAULT; } } if (links->links) { struct media_link *link; struct media_link_desc __user *ulink_desc = links->links; list_for_each_entry(link, &entity->links, list) { struct media_link_desc klink_desc; /* Ignore backlinks. */ if (link->source->entity != entity) continue; memset(&klink_desc, 0, sizeof(klink_desc)); media_device_kpad_to_upad(link->source, &klink_desc.source); media_device_kpad_to_upad(link->sink, &klink_desc.sink); klink_desc.flags = link->flags; if (copy_to_user(ulink_desc, &klink_desc, sizeof(*ulink_desc))) return -EFAULT; ulink_desc++; } } memset(links->reserved, 0, sizeof(links->reserved)); return 0; } static long media_device_setup_link(struct media_device *mdev, void *arg) { struct media_link_desc *linkd = arg; struct media_link *link = NULL; struct media_entity *source; struct media_entity *sink; /* Find the source and sink entities and link. */ source = find_entity(mdev, linkd->source.entity); sink = find_entity(mdev, linkd->sink.entity); if (source == NULL || sink == NULL) return -EINVAL; if (linkd->source.index >= source->num_pads || linkd->sink.index >= sink->num_pads) return -EINVAL; link = media_entity_find_link(&source->pads[linkd->source.index], &sink->pads[linkd->sink.index]); if (link == NULL) return -EINVAL; memset(linkd->reserved, 0, sizeof(linkd->reserved)); /* Setup the link on both entities. */ return __media_entity_setup_link(link, linkd->flags); } static long media_device_get_topology(struct media_device *mdev, void *arg) { struct media_v2_topology *topo = arg; struct media_entity *entity; struct media_interface *intf; struct media_pad *pad; struct media_link *link; struct media_v2_entity kentity, __user *uentity; struct media_v2_interface kintf, __user *uintf; struct media_v2_pad kpad, __user *upad; struct media_v2_link klink, __user *ulink; unsigned int i; int ret = 0; topo->topology_version = mdev->topology_version; /* Get entities and number of entities */ i = 0; uentity = media_get_uptr(topo->ptr_entities); media_device_for_each_entity(entity, mdev) { i++; if (ret || !uentity) continue; if (i > topo->num_entities) { ret = -ENOSPC; continue; } /* Copy fields to userspace struct if not error */ memset(&kentity, 0, sizeof(kentity)); kentity.id = entity->graph_obj.id; kentity.function = entity->function; kentity.flags = entity->flags; strscpy(kentity.name, entity->name, sizeof(kentity.name)); if (copy_to_user(uentity, &kentity, sizeof(kentity))) ret = -EFAULT; uentity++; } topo->num_entities = i; topo->reserved1 = 0; /* Get interfaces and number of interfaces */ i = 0; uintf = media_get_uptr(topo->ptr_interfaces); media_device_for_each_intf(intf, mdev) { i++; if (ret || !uintf) continue; if (i > topo->num_interfaces) { ret = -ENOSPC; continue; } memset(&kintf, 0, sizeof(kintf)); /* Copy intf fields to userspace struct */ kintf.id = intf->graph_obj.id; kintf.intf_type = intf->type; kintf.flags = intf->flags; if (media_type(&intf->graph_obj) == MEDIA_GRAPH_INTF_DEVNODE) { struct media_intf_devnode *devnode; devnode = intf_to_devnode(intf); kintf.devnode.major = devnode->major; kintf.devnode.minor = devnode->minor; } if (copy_to_user(uintf, &kintf, sizeof(kintf))) ret = -EFAULT; uintf++; } topo->num_interfaces = i; topo->reserved2 = 0; /* Get pads and number of pads */ i = 0; upad = media_get_uptr(topo->ptr_pads); media_device_for_each_pad(pad, mdev) { i++; if (ret || !upad) continue; if (i > topo->num_pads) { ret = -ENOSPC; continue; } memset(&kpad, 0, sizeof(kpad)); /* Copy pad fields to userspace struct */ kpad.id = pad->graph_obj.id; kpad.entity_id = pad->entity->graph_obj.id; kpad.flags = pad->flags; kpad.index = pad->index; if (copy_to_user(upad, &kpad, sizeof(kpad))) ret = -EFAULT; upad++; } topo->num_pads = i; topo->reserved3 = 0; /* Get links and number of links */ i = 0; ulink = media_get_uptr(topo->ptr_links); media_device_for_each_link(link, mdev) { if (link->is_backlink) continue; i++; if (ret || !ulink) continue; if (i > topo->num_links) { ret = -ENOSPC; continue; } memset(&klink, 0, sizeof(klink)); /* Copy link fields to userspace struct */ klink.id = link->graph_obj.id; klink.source_id = link->gobj0->id; klink.sink_id = link->gobj1->id; klink.flags = link->flags; if (copy_to_user(ulink, &klink, sizeof(klink))) ret = -EFAULT; ulink++; } topo->num_links = i; topo->reserved4 = 0; return ret; } static long media_device_request_alloc(struct media_device *mdev, void *arg) { int *alloc_fd = arg; if (!mdev->ops || !mdev->ops->req_validate || !mdev->ops->req_queue) return -ENOTTY; return media_request_alloc(mdev, alloc_fd); } static long copy_arg_from_user(void *karg, void __user *uarg, unsigned int cmd) { if ((_IOC_DIR(cmd) & _IOC_WRITE) && copy_from_user(karg, uarg, _IOC_SIZE(cmd))) return -EFAULT; return 0; } static long copy_arg_to_user(void __user *uarg, void *karg, unsigned int cmd) { if ((_IOC_DIR(cmd) & _IOC_READ) && copy_to_user(uarg, karg, _IOC_SIZE(cmd))) return -EFAULT; return 0; } /* Do acquire the graph mutex */ #define MEDIA_IOC_FL_GRAPH_MUTEX BIT(0) #define MEDIA_IOC_ARG(__cmd, func, fl, from_user, to_user) \ [_IOC_NR(MEDIA_IOC_##__cmd)] = { \ .cmd = MEDIA_IOC_##__cmd, \ .fn = func, \ .flags = fl, \ .arg_from_user = from_user, \ .arg_to_user = to_user, \ } #define MEDIA_IOC(__cmd, func, fl) \ MEDIA_IOC_ARG(__cmd, func, fl, copy_arg_from_user, copy_arg_to_user) /* the table is indexed by _IOC_NR(cmd) */ struct media_ioctl_info { unsigned int cmd; unsigned short flags; long (*fn)(struct media_device *dev, void *arg); long (*arg_from_user)(void *karg, void __user *uarg, unsigned int cmd); long (*arg_to_user)(void __user *uarg, void *karg, unsigned int cmd); }; static const struct media_ioctl_info ioctl_info[] = { MEDIA_IOC(DEVICE_INFO, media_device_get_info, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(ENUM_ENTITIES, media_device_enum_entities, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(ENUM_LINKS, media_device_enum_links, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(SETUP_LINK, media_device_setup_link, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(G_TOPOLOGY, media_device_get_topology, MEDIA_IOC_FL_GRAPH_MUTEX), MEDIA_IOC(REQUEST_ALLOC, media_device_request_alloc, 0), }; static long media_device_ioctl(struct file *filp, unsigned int cmd, unsigned long __arg) { struct media_devnode *devnode = media_devnode_data(filp); struct media_device *dev = devnode->media_dev; const struct media_ioctl_info *info; void __user *arg = (void __user *)__arg; char __karg[256], *karg = __karg; long ret; if (_IOC_NR(cmd) >= ARRAY_SIZE(ioctl_info) || ioctl_info[_IOC_NR(cmd)].cmd != cmd) return -ENOIOCTLCMD; info = &ioctl_info[_IOC_NR(cmd)]; if (_IOC_SIZE(info->cmd) > sizeof(__karg)) { karg = kmalloc(_IOC_SIZE(info->cmd), GFP_KERNEL); if (!karg) return -ENOMEM; } if (info->arg_from_user) { ret = info->arg_from_user(karg, arg, cmd); if (ret) goto out_free; } if (info->flags & MEDIA_IOC_FL_GRAPH_MUTEX) mutex_lock(&dev->graph_mutex); ret = info->fn(dev, karg); if (info->flags & MEDIA_IOC_FL_GRAPH_MUTEX) mutex_unlock(&dev->graph_mutex); if (!ret && info->arg_to_user) ret = info->arg_to_user(arg, karg, cmd); out_free: if (karg != __karg) kfree(karg); return ret; } #ifdef CONFIG_COMPAT struct media_links_enum32 { __u32 entity; compat_uptr_t pads; /* struct media_pad_desc * */ compat_uptr_t links; /* struct media_link_desc * */ __u32 reserved[4]; }; static long media_device_enum_links32(struct media_device *mdev, struct media_links_enum32 __user *ulinks) { struct media_links_enum links; compat_uptr_t pads_ptr, links_ptr; int ret; memset(&links, 0, sizeof(links)); if (get_user(links.entity, &ulinks->entity) || get_user(pads_ptr, &ulinks->pads) || get_user(links_ptr, &ulinks->links)) return -EFAULT; links.pads = compat_ptr(pads_ptr); links.links = compat_ptr(links_ptr); ret = media_device_enum_links(mdev, &links); if (ret) return ret; if (copy_to_user(ulinks->reserved, links.reserved, sizeof(ulinks->reserved))) return -EFAULT; return 0; } #define MEDIA_IOC_ENUM_LINKS32 _IOWR('|', 0x02, struct media_links_enum32) static long media_device_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct media_devnode *devnode = media_devnode_data(filp); struct media_device *dev = devnode->media_dev; long ret; switch (cmd) { case MEDIA_IOC_ENUM_LINKS32: mutex_lock(&dev->graph_mutex); ret = media_device_enum_links32(dev, (struct media_links_enum32 __user *)arg); mutex_unlock(&dev->graph_mutex); break; default: return media_device_ioctl(filp, cmd, arg); } return ret; } #endif /* CONFIG_COMPAT */ static const struct media_file_operations media_device_fops = { .owner = THIS_MODULE, .open = media_device_open, .ioctl = media_device_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = media_device_compat_ioctl, #endif /* CONFIG_COMPAT */ .release = media_device_close, }; /* ----------------------------------------------------------------------------- * sysfs */ static ssize_t model_show(struct device *cd, struct device_attribute *attr, char *buf) { struct media_devnode *devnode = to_media_devnode(cd); struct media_device *mdev = devnode->media_dev; return sprintf(buf, "%.*s\n", (int)sizeof(mdev->model), mdev->model); } static DEVICE_ATTR_RO(model); /* ----------------------------------------------------------------------------- * Registration/unregistration */ static void media_device_release(struct media_devnode *devnode) { dev_dbg(devnode->parent, "Media device released\n"); } static void __media_device_unregister_entity(struct media_entity *entity) { struct media_device *mdev = entity->graph_obj.mdev; struct media_link *link, *tmp; struct media_interface *intf; struct media_pad *iter; ida_free(&mdev->entity_internal_idx, entity->internal_idx); /* Remove all interface links pointing to this entity */ list_for_each_entry(intf, &mdev->interfaces, graph_obj.list) { list_for_each_entry_safe(link, tmp, &intf->links, list) { if (link->entity == entity) __media_remove_intf_link(link); } } /* Remove all data links that belong to this entity */ __media_entity_remove_links(entity); /* Remove all pads that belong to this entity */ media_entity_for_each_pad(entity, iter) media_gobj_destroy(&iter->graph_obj); /* Remove the entity */ media_gobj_destroy(&entity->graph_obj); /* invoke entity_notify callbacks to handle entity removal?? */ } int __must_check media_device_register_entity(struct media_device *mdev, struct media_entity *entity) { struct media_entity_notify *notify, *next; struct media_pad *iter; int ret; if (entity->function == MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN || entity->function == MEDIA_ENT_F_UNKNOWN) dev_warn(mdev->dev, "Entity type for entity %s was not initialized!\n", entity->name); /* Warn if we apparently re-register an entity */ WARN_ON(entity->graph_obj.mdev != NULL); entity->graph_obj.mdev = mdev; INIT_LIST_HEAD(&entity->links); entity->num_links = 0; entity->num_backlinks = 0; ret = ida_alloc_min(&mdev->entity_internal_idx, 1, GFP_KERNEL); if (ret < 0) return ret; entity->internal_idx = ret; mutex_lock(&mdev->graph_mutex); mdev->entity_internal_idx_max = max(mdev->entity_internal_idx_max, entity->internal_idx); /* Initialize media_gobj embedded at the entity */ media_gobj_create(mdev, MEDIA_GRAPH_ENTITY, &entity->graph_obj); /* Initialize objects at the pads */ media_entity_for_each_pad(entity, iter) media_gobj_create(mdev, MEDIA_GRAPH_PAD, &iter->graph_obj); /* invoke entity_notify callbacks */ list_for_each_entry_safe(notify, next, &mdev->entity_notify, list) notify->notify(entity, notify->notify_data); if (mdev->entity_internal_idx_max >= mdev->pm_count_walk.ent_enum.idx_max) { struct media_graph new = { .top = 0 }; /* * Initialise the new graph walk before cleaning up * the old one in order not to spoil the graph walk * object of the media device if graph walk init fails. */ ret = media_graph_walk_init(&new, mdev); if (ret) { __media_device_unregister_entity(entity); mutex_unlock(&mdev->graph_mutex); return ret; } media_graph_walk_cleanup(&mdev->pm_count_walk); mdev->pm_count_walk = new; } mutex_unlock(&mdev->graph_mutex); return 0; } EXPORT_SYMBOL_GPL(media_device_register_entity); void media_device_unregister_entity(struct media_entity *entity) { struct media_device *mdev = entity->graph_obj.mdev; if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_device_unregister_entity(entity); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_device_unregister_entity); void media_device_init(struct media_device *mdev) { INIT_LIST_HEAD(&mdev->entities); INIT_LIST_HEAD(&mdev->interfaces); INIT_LIST_HEAD(&mdev->pads); INIT_LIST_HEAD(&mdev->links); INIT_LIST_HEAD(&mdev->entity_notify); mutex_init(&mdev->req_queue_mutex); mutex_init(&mdev->graph_mutex); ida_init(&mdev->entity_internal_idx); atomic_set(&mdev->request_id, 0); if (!*mdev->bus_info) media_set_bus_info(mdev->bus_info, sizeof(mdev->bus_info), mdev->dev); dev_dbg(mdev->dev, "Media device initialized\n"); } EXPORT_SYMBOL_GPL(media_device_init); void media_device_cleanup(struct media_device *mdev) { ida_destroy(&mdev->entity_internal_idx); mdev->entity_internal_idx_max = 0; media_graph_walk_cleanup(&mdev->pm_count_walk); mutex_destroy(&mdev->graph_mutex); mutex_destroy(&mdev->req_queue_mutex); } EXPORT_SYMBOL_GPL(media_device_cleanup); int __must_check __media_device_register(struct media_device *mdev, struct module *owner) { struct media_devnode *devnode; int ret; devnode = kzalloc(sizeof(*devnode), GFP_KERNEL); if (!devnode) return -ENOMEM; /* Register the device node. */ mdev->devnode = devnode; devnode->fops = &media_device_fops; devnode->parent = mdev->dev; devnode->release = media_device_release; /* Set version 0 to indicate user-space that the graph is static */ mdev->topology_version = 0; ret = media_devnode_register(mdev, devnode, owner); if (ret < 0) { /* devnode free is handled in media_devnode_*() */ mdev->devnode = NULL; return ret; } ret = device_create_file(&devnode->dev, &dev_attr_model); if (ret < 0) { /* devnode free is handled in media_devnode_*() */ mdev->devnode = NULL; media_devnode_unregister_prepare(devnode); media_devnode_unregister(devnode); return ret; } dev_dbg(mdev->dev, "Media device registered\n"); return 0; } EXPORT_SYMBOL_GPL(__media_device_register); void media_device_register_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr) { mutex_lock(&mdev->graph_mutex); list_add_tail(&nptr->list, &mdev->entity_notify); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_device_register_entity_notify); /* * Note: Should be called with mdev->lock held. */ static void __media_device_unregister_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr) { list_del(&nptr->list); } void media_device_unregister_entity_notify(struct media_device *mdev, struct media_entity_notify *nptr) { mutex_lock(&mdev->graph_mutex); __media_device_unregister_entity_notify(mdev, nptr); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_device_unregister_entity_notify); void media_device_unregister(struct media_device *mdev) { struct media_entity *entity; struct media_entity *next; struct media_interface *intf, *tmp_intf; struct media_entity_notify *notify, *nextp; if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); /* Check if mdev was ever registered at all */ if (!media_devnode_is_registered(mdev->devnode)) { mutex_unlock(&mdev->graph_mutex); return; } /* Clear the devnode register bit to avoid races with media dev open */ media_devnode_unregister_prepare(mdev->devnode); /* Remove all entities from the media device */ list_for_each_entry_safe(entity, next, &mdev->entities, graph_obj.list) __media_device_unregister_entity(entity); /* Remove all entity_notify callbacks from the media device */ list_for_each_entry_safe(notify, nextp, &mdev->entity_notify, list) __media_device_unregister_entity_notify(mdev, notify); /* Remove all interfaces from the media device */ list_for_each_entry_safe(intf, tmp_intf, &mdev->interfaces, graph_obj.list) { /* * Unlink the interface, but don't free it here; the * module which created it is responsible for freeing * it */ __media_remove_intf_links(intf); media_gobj_destroy(&intf->graph_obj); } mutex_unlock(&mdev->graph_mutex); dev_dbg(mdev->dev, "Media device unregistered\n"); device_remove_file(&mdev->devnode->dev, &dev_attr_model); media_devnode_unregister(mdev->devnode); /* devnode free is handled in media_devnode_*() */ mdev->devnode = NULL; } EXPORT_SYMBOL_GPL(media_device_unregister); #if IS_ENABLED(CONFIG_PCI) void media_device_pci_init(struct media_device *mdev, struct pci_dev *pci_dev, const char *name) { mdev->dev = &pci_dev->dev; if (name) strscpy(mdev->model, name, sizeof(mdev->model)); else strscpy(mdev->model, pci_name(pci_dev), sizeof(mdev->model)); sprintf(mdev->bus_info, "PCI:%s", pci_name(pci_dev)); mdev->hw_revision = (pci_dev->subsystem_vendor << 16) | pci_dev->subsystem_device; media_device_init(mdev); } EXPORT_SYMBOL_GPL(media_device_pci_init); #endif #if IS_ENABLED(CONFIG_USB) void __media_device_usb_init(struct media_device *mdev, struct usb_device *udev, const char *board_name, const char *driver_name) { mdev->dev = &udev->dev; if (driver_name) strscpy(mdev->driver_name, driver_name, sizeof(mdev->driver_name)); if (board_name) strscpy(mdev->model, board_name, sizeof(mdev->model)); else if (udev->product) strscpy(mdev->model, udev->product, sizeof(mdev->model)); else strscpy(mdev->model, "unknown model", sizeof(mdev->model)); if (udev->serial) strscpy(mdev->serial, udev->serial, sizeof(mdev->serial)); usb_make_path(udev, mdev->bus_info, sizeof(mdev->bus_info)); mdev->hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); media_device_init(mdev); } EXPORT_SYMBOL_GPL(__media_device_usb_init); #endif #endif /* CONFIG_MEDIA_CONTROLLER */ |
14 14 14 14 7 7 4 3 3 9 9 2 4 1 2 2 4 2 4 14 14 2 1 2 7 2 9 3 6 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_fw.c Classifier mapping ipchains' fwmark to traffic class. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel). * Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <net/sch_generic.h> #include <net/tc_wrapper.h> #define HTSIZE 256 struct fw_head { u32 mask; struct fw_filter __rcu *ht[HTSIZE]; struct rcu_head rcu; }; struct fw_filter { struct fw_filter __rcu *next; u32 id; struct tcf_result res; int ifindex; struct tcf_exts exts; struct tcf_proto *tp; struct rcu_work rwork; }; static u32 fw_hash(u32 handle) { handle ^= (handle >> 16); handle ^= (handle >> 8); return handle % HTSIZE; } TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; int r; u32 id = skb->mark; if (head != NULL) { id &= head->mask; for (f = rcu_dereference_bh(head->ht[fw_hash(id)]); f; f = rcu_dereference_bh(f->next)) { if (f->id == id) { *res = f->res; if (!tcf_match_indev(skb, f->ifindex)) continue; r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) continue; return r; } } } else { struct Qdisc *q = tcf_block_q(tp->chain->block); /* Old method: classify the packet using its skb mark. */ if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id ^ q->handle)))) { res->classid = id; res->class = 0; return 0; } } return -1; } static void *fw_get(struct tcf_proto *tp, u32 handle) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; if (head == NULL) return NULL; f = rtnl_dereference(head->ht[fw_hash(handle)]); for (; f; f = rtnl_dereference(f->next)) { if (f->id == handle) return f; } return NULL; } static int fw_init(struct tcf_proto *tp) { /* We don't allocate fw_head here, because in the old method * we don't need it at all. */ return 0; } static void __fw_delete_filter(struct fw_filter *f) { tcf_exts_destroy(&f->exts); tcf_exts_put_net(&f->exts); kfree(f); } static void fw_delete_filter_work(struct work_struct *work) { struct fw_filter *f = container_of(to_rcu_work(work), struct fw_filter, rwork); rtnl_lock(); __fw_delete_filter(f); rtnl_unlock(); } static void fw_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; int h; if (head == NULL) return; for (h = 0; h < HTSIZE; h++) { while ((f = rtnl_dereference(head->ht[h])) != NULL) { RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); if (tcf_exts_get_net(&f->exts)) tcf_queue_work(&f->rwork, fw_delete_filter_work); else __fw_delete_filter(f); } } kfree_rcu(head, rcu); } static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = arg; struct fw_filter __rcu **fp; struct fw_filter *pfp; int ret = -EINVAL; int h; if (head == NULL || f == NULL) goto out; fp = &head->ht[fw_hash(f->id)]; for (pfp = rtnl_dereference(*fp); pfp; fp = &pfp->next, pfp = rtnl_dereference(*fp)) { if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); tcf_exts_get_net(&f->exts); tcf_queue_work(&f->rwork, fw_delete_filter_work); ret = 0; break; } } *last = true; for (h = 0; h < HTSIZE; h++) { if (rcu_access_pointer(head->ht[h])) { *last = false; break; } } out: return ret; } static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { [TCA_FW_CLASSID] = { .type = NLA_U32 }, [TCA_FW_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, [TCA_FW_MASK] = { .type = NLA_U32 }, }; static int fw_set_parms(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, struct nlattr **tca, unsigned long base, u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); u32 mask; int err; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags, extack); if (err < 0) return err; if (tb[TCA_FW_INDEV]) { int ret; ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack); if (ret < 0) return ret; f->ifindex = ret; } err = -EINVAL; if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) return err; } else if (head->mask != 0xFFFFFFFF) return err; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); tcf_bind_filter(tp, &f->res, base); } return 0; } static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; int err; if (!opt) return handle ? -EINVAL : 0; /* Succeed if it is old method. */ err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy, NULL); if (err < 0) return err; if (f) { struct fw_filter *pfp, *fnew; struct fw_filter __rcu **fp; if (f->id != handle && handle) return -EINVAL; fnew = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); if (!fnew) return -ENOBUFS; fnew->id = f->id; fnew->ifindex = f->ifindex; fnew->tp = f->tp; err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT, TCA_FW_POLICE); if (err < 0) { kfree(fnew); return err; } err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } fp = &head->ht[fw_hash(fnew->id)]; for (pfp = rtnl_dereference(*fp); pfp; fp = &pfp->next, pfp = rtnl_dereference(*fp)) if (pfp == f) break; RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); tcf_exts_get_net(&f->exts); tcf_queue_work(&f->rwork, fw_delete_filter_work); *arg = fnew; return err; } if (!handle) return -EINVAL; if (!head) { u32 mask = 0xFFFFFFFF; if (tb[TCA_FW_MASK]) mask = nla_get_u32(tb[TCA_FW_MASK]); head = kzalloc(sizeof(*head), GFP_KERNEL); if (!head) return -ENOBUFS; head->mask = mask; rcu_assign_pointer(tp->root, head); } f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); if (f == NULL) return -ENOBUFS; err = tcf_exts_init(&f->exts, net, TCA_FW_ACT, TCA_FW_POLICE); if (err < 0) goto errout; f->id = handle; f->tp = tp; err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack); if (err < 0) goto errout; RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]); rcu_assign_pointer(head->ht[fw_hash(handle)], f); *arg = f; return 0; errout: tcf_exts_destroy(&f->exts); kfree(f); return err; } static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); int h; if (head == NULL) arg->stop = 1; if (arg->stop) return; for (h = 0; h < HTSIZE; h++) { struct fw_filter *f; for (f = rtnl_dereference(head->ht[h]); f; f = rtnl_dereference(f->next)) { if (!tc_cls_stats_dump(tp, arg, f)) return; } } } static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = fh; struct nlattr *nest; if (f == NULL) return skb->len; t->tcm_handle = f->id; if (!f->res.classid && !tcf_exts_has_actions(&f->exts)) return skb->len; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (f->res.classid && nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid)) goto nla_put_failure; if (f->ifindex) { struct net_device *dev; dev = __dev_get_by_index(net, f->ifindex); if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name)) goto nla_put_failure; } if (head->mask != 0xFFFFFFFF && nla_put_u32(skb, TCA_FW_MASK, head->mask)) goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q, unsigned long base) { struct fw_filter *f = fh; tc_cls_bind_class(classid, cl, q, &f->res, base); } static struct tcf_proto_ops cls_fw_ops __read_mostly = { .kind = "fw", .classify = fw_classify, .init = fw_init, .destroy = fw_destroy, .get = fw_get, .change = fw_change, .delete = fw_delete, .walk = fw_walk, .dump = fw_dump, .bind_class = fw_bind_class, .owner = THIS_MODULE, }; static int __init init_fw(void) { return register_tcf_proto_ops(&cls_fw_ops); } static void __exit exit_fw(void) { unregister_tcf_proto_ops(&cls_fw_ops); } module_init(init_fw) module_exit(exit_fw) MODULE_DESCRIPTION("SKB mark based TC classifier"); MODULE_LICENSE("GPL"); |
32 47 3 3 3 3 3 106 82 433 389 232 158 207 207 1051 1262 1259 17 527 743 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the IP router. * * Version: @(#)route.h 1.0.4 05/27/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Fixes: * Alan Cox : Reformatted. Added ip_rt_local() * Alan Cox : Support for TCP parameters. * Alexey Kuznetsov: Major changes for new routing code. * Mike McLagan : Routing by source * Robert Olsson : Added rt_cache statistics */ #ifndef _ROUTE_H #define _ROUTE_H #include <net/dst.h> #include <net/inetpeer.h> #include <net/flow.h> #include <net/inet_sock.h> #include <net/ip_fib.h> #include <net/arp.h> #include <net/ndisc.h> #include <linux/in_route.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> #include <linux/route.h> #include <linux/ip.h> #include <linux/cache.h> #include <linux/security.h> #define RTO_ONLINK 0x01 #define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) static inline __u8 ip_sock_rt_scope(const struct sock *sk) { if (sock_flag(sk, SOCK_LOCALROUTE)) return RT_SCOPE_LINK; return RT_SCOPE_UNIVERSE; } static inline __u8 ip_sock_rt_tos(const struct sock *sk) { return RT_TOS(READ_ONCE(inet_sk(sk)->tos)); } struct ip_tunnel_info; struct fib_nh; struct fib_info; struct uncached_list; struct rtable { struct dst_entry dst; int rt_genid; unsigned int rt_flags; __u16 rt_type; __u8 rt_is_input; __u8 rt_uses_gateway; int rt_iif; u8 rt_gw_family; /* Info on neighbour */ union { __be32 rt_gw4; struct in6_addr rt_gw6; }; /* Miscellaneous cached information */ u32 rt_mtu_locked:1, rt_pmtu:31; }; static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_is_input != 0; } static inline bool rt_is_output_route(const struct rtable *rt) { return rt->rt_is_input == 0; } static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) { if (rt->rt_gw_family == AF_INET) return rt->rt_gw4; return daddr; } struct ip_rt_acct { __u32 o_bytes; __u32 o_packets; __u32 i_bytes; __u32 i_packets; }; struct rt_cache_stat { unsigned int in_slow_tot; unsigned int in_slow_mc; unsigned int in_no_route; unsigned int in_brd; unsigned int in_martian_dst; unsigned int in_martian_src; unsigned int out_slow_tot; unsigned int out_slow_mc; }; extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, const struct sk_buff *skb); struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, struct fib_result *res, const struct sk_buff *skb); static inline struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp) { return ip_route_output_key_hash(net, flp, NULL); } struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp) { return ip_route_output_flow(net, flp, NULL); } static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, __be32 saddr, u8 tos, int oif) { struct flowi4 fl4 = { .flowi4_oif = oif, .flowi4_tos = tos, .daddr = daddr, .saddr = saddr, }; return ip_route_output_key(net, &fl4); } static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4, const struct sock *sk, __be32 daddr, __be32 saddr, __be16 dport, __be16 sport, __u8 proto, __u8 tos, int oif) { flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 *fl4, __be32 daddr, __be32 saddr, __be32 gre_key, __u8 tos, int oif) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; fl4->daddr = daddr; fl4->saddr = saddr; fl4->flowi4_tos = tos; fl4->flowi4_proto = IPPROTO_GRE; fl4->fl4_gre_key = gre_key; return ip_route_output_key(net, fl4); } int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin, const struct sk_buff *hint); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) { int err; rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); if (!err) { skb_dst_force(skb); if (!skb_dst(skb)) err = -EINVAL; } rcu_read_unlock(); return err; } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u8 protocol); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); void ip_rt_send_redirect(struct sk_buff *skb); unsigned int inet_addr_type(struct net *net, __be32 addr); unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr); void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, bool noxfrm); struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric); void rt_add_uncached_list(struct rtable *rt); void rt_del_uncached_list(struct rtable *rt); int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, int *fa_index, int fa_start, unsigned int flags); static inline void ip_rt_put(struct rtable *rt) { /* dst_release() accepts a NULL parameter. * We rely on dst being first structure in struct rtable */ BUILD_BUG_ON(offsetof(struct rtable, dst) != 0); dst_release(&rt->dst); } #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) extern const __u8 ip_tos2prio[16]; static inline char rt_tos2priority(u8 tos) { return ip_tos2prio[IPTOS_TOS(tos)>>1]; } /* ip_route_connect() and ip_route_newports() work in tandem whilst * binding a socket for a new outgoing connection. * * In order to use IPSEC properly, we must, in the end, have a * route that was looked up using all available keys including source * and destination ports. * * However, if a source port needs to be allocated (the user specified * a wildcard source port) we need to obtain addressing information * in order to perform that allocation. * * So ip_route_connect() looks up a route using wildcarded source and * destination ports in the key, simply so that we can get a pair of * addresses to use for port allocation. * * Later, once the ports are allocated, ip_route_newports() will make * another route lookup if needed to make sure we catch any IPSEC * rules keyed on the port information. * * The callers allocate the flow key on their stack, and must pass in * the same flowi4 object to both the ip_route_connect() and the * ip_route_newports() calls. */ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, const struct sock *sk) { __u8 flow_flags = 0; if (inet_test_bit(TRANSPARENT, sk)) flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, const struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) return rt; ip_rt_put(rt); flowi4_update_output(fl4, oif, fl4->daddr, fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt, __be16 orig_sport, __be16 orig_dport, __be16 sport, __be16 dport, const struct sock *sk) { if (sport != orig_sport || dport != orig_dport) { fl4->fl4_dport = dport; fl4->fl4_sport = sport; ip_rt_put(rt); flowi4_update_output(fl4, sk->sk_bound_dev_if, fl4->daddr, fl4->saddr); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } return rt; } static inline int inet_iif(const struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); if (rt && rt->rt_iif) return rt->rt_iif; return skb->skb_iif; } static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); struct net *net = dev_net(dst->dev); if (hoplimit == 0) hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, __be32 daddr) { struct neighbour *neigh; neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); return neigh; } static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt, struct sk_buff *skb, bool *is_v6gw) { struct net_device *dev = rt->dst.dev; struct neighbour *neigh; if (likely(rt->rt_gw_family == AF_INET)) { neigh = ip_neigh_gw4(dev, rt->rt_gw4); } else if (rt->rt_gw_family == AF_INET6) { neigh = ip_neigh_gw6(dev, &rt->rt_gw6); *is_v6gw = true; } else { neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr); } return neigh; } #endif /* _ROUTE_H */ |
30 1 48 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Definitions for the SMC module (socket related) * * Copyright IBM Corp. 2016 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #ifndef __SMC_H #define __SMC_H #include <linux/socket.h> #include <linux/types.h> #include <linux/compiler.h> /* __aligned */ #include <net/genetlink.h> #include <net/sock.h> #include "smc_ib.h" #define SMC_V1 1 /* SMC version V1 */ #define SMC_V2 2 /* SMC version V2 */ #define SMC_RELEASE_0 0 #define SMC_RELEASE_1 1 #define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ #define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */ extern struct proto smc_proto; extern struct proto smc_proto6; #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 #endif enum smc_state { /* possible states of an SMC socket */ SMC_ACTIVE = 1, SMC_INIT = 2, SMC_CLOSED = 7, SMC_LISTEN = 10, /* normal close */ SMC_PEERCLOSEWAIT1 = 20, SMC_PEERCLOSEWAIT2 = 21, SMC_APPFINCLOSEWAIT = 24, SMC_APPCLOSEWAIT1 = 22, SMC_APPCLOSEWAIT2 = 23, SMC_PEERFINCLOSEWAIT = 25, /* abnormal close */ SMC_PEERABORTWAIT = 26, SMC_PROCESSABORT = 27, }; enum smc_supplemental_features { SMC_SPF_VIRT_ISM_DEV = 0, }; #define SMC_FEATURE_MASK \ (BIT(SMC_SPF_VIRT_ISM_DEV)) struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ union { u8 type; #if defined(__BIG_ENDIAN_BITFIELD) struct { u8 llc_version:4, llc_type:4; }; #elif defined(__LITTLE_ENDIAN_BITFIELD) struct { u8 llc_type:4, llc_version:4; }; #endif }; } __aligned(1); struct smc_cdc_conn_state_flags { #if defined(__BIG_ENDIAN_BITFIELD) u8 peer_done_writing : 1; /* Sending done indicator */ u8 peer_conn_closed : 1; /* Peer connection closed indicator */ u8 peer_conn_abort : 1; /* Abnormal close indicator */ u8 reserved : 5; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved : 5; u8 peer_conn_abort : 1; u8 peer_conn_closed : 1; u8 peer_done_writing : 1; #endif }; struct smc_cdc_producer_flags { #if defined(__BIG_ENDIAN_BITFIELD) u8 write_blocked : 1; /* Writing Blocked, no rx buf space */ u8 urg_data_pending : 1; /* Urgent Data Pending */ u8 urg_data_present : 1; /* Urgent Data Present */ u8 cons_curs_upd_req : 1; /* cursor update requested */ u8 failover_validation : 1;/* message replay due to failover */ u8 reserved : 3; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved : 3; u8 failover_validation : 1; u8 cons_curs_upd_req : 1; u8 urg_data_present : 1; u8 urg_data_pending : 1; u8 write_blocked : 1; #endif }; /* in host byte order */ union smc_host_cursor { /* SMC cursor - an offset in an RMBE */ struct { u16 reserved; u16 wrap; /* window wrap sequence number */ u32 count; /* cursor (= offset) part */ }; #ifdef KERNEL_HAS_ATOMIC64 atomic64_t acurs; /* for atomic processing */ #else u64 acurs; /* for atomic processing */ #endif } __aligned(8); /* in host byte order, except for flag bitfields in network byte order */ struct smc_host_cdc_msg { /* Connection Data Control message */ struct smc_wr_rx_hdr common; /* .type = 0xFE */ u8 len; /* length = 44 */ u16 seqno; /* connection seq # */ u32 token; /* alert_token */ union smc_host_cursor prod; /* producer cursor */ union smc_host_cursor cons; /* consumer cursor, * piggy backed "ack" */ struct smc_cdc_producer_flags prod_flags; /* conn. tx/rx status */ struct smc_cdc_conn_state_flags conn_state_flags; /* peer conn. status*/ u8 reserved[18]; } __aligned(8); enum smc_urg_state { SMC_URG_VALID = 1, /* data present */ SMC_URG_NOTYET = 2, /* data pending */ SMC_URG_READ = 3, /* data was already read */ }; struct smc_mark_woken { bool woken; void *key; wait_queue_entry_t wait_entry; }; struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ struct smc_link *lnk; /* assigned SMC-R link */ u32 alert_token_local; /* unique conn. id */ u8 peer_rmbe_idx; /* from tcp handshake */ int peer_rmbe_size; /* size of peer rx buffer */ atomic_t peer_rmbe_space;/* remaining free bytes in peer * rmbe */ int rtoken_idx; /* idx to peer RMB rkey/addr */ struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */ struct smc_buf_desc *rmb_desc; /* RMBE descriptor */ int rmbe_size_comp; /* compressed notation */ int rmbe_update_limit; /* lower limit for consumer * cursor update */ struct smc_host_cdc_msg local_tx_ctrl; /* host byte order staging * buffer for CDC msg send * .prod cf. TCP snd_nxt * .cons cf. TCP sends ack */ union smc_host_cursor local_tx_ctrl_fin; /* prod crsr - confirmed by peer */ union smc_host_cursor tx_curs_prep; /* tx - prepared data * snd_max..wmem_alloc */ union smc_host_cursor tx_curs_sent; /* tx - sent data * snd_nxt ? */ union smc_host_cursor tx_curs_fin; /* tx - confirmed by peer * snd-wnd-begin ? */ atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe * - inc when post wqe, * - dec on polled tx cqe */ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. * .prod cf. TCP rcv_nxt * .cons cf. TCP snd_una */ union smc_host_cursor rx_curs_confirmed; /* confirmed to peer * source of snd_una ? */ union smc_host_cursor urg_curs; /* points at urgent byte */ enum smc_urg_state urg_state; bool urg_tx_pend; /* urgent data staged */ bool urg_rx_skip_pend; /* indicate urgent oob data * read, but previous regular * data still pending */ char urg_rx_byte; /* urgent byte */ bool tx_in_release_sock; /* flush pending tx data in * sock release_cb() */ atomic_t bytes_to_rcv; /* arrived data, * not yet received */ atomic_t splice_pending; /* number of spliced bytes * pending processing */ #ifndef KERNEL_HAS_ATOMIC64 spinlock_t acurs_lock; /* protect cursors */ #endif struct work_struct close_work; /* peer sent some closing */ struct work_struct abort_work; /* abort the connection */ struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ u8 rx_off; /* receive offset: * 0 for SMC-R, 32 for SMC-D */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ u8 freed : 1; /* normal termiation */ u8 out_of_sync : 1; /* out of sync with peer */ }; struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ void (*clcsk_state_change)(struct sock *sk); /* original stat_change fct. */ void (*clcsk_data_ready)(struct sock *sk); /* original data_ready fct. */ void (*clcsk_write_space)(struct sock *sk); /* original write_space fct. */ void (*clcsk_error_report)(struct sock *sk); /* original error_report fct. */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct connect_work; /* handle non-blocking connect*/ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ bool limit_smc_hs; /* put constraint on handshake */ bool use_fallback; /* fallback to tcp */ int fallback_rsn; /* reason for fallback */ u32 peer_diagnosis; /* decline reason from peer */ atomic_t queued_smc_hs; /* queued smc handshakes */ struct inet_connection_sock_af_ops af_ops; const struct inet_connection_sock_af_ops *ori_af_ops; /* original af ops */ int sockopt_defer_accept; /* sockopt TCP_DEFER_ACCEPT * value */ u8 wait_close_tx_prepared : 1; /* shutdown wr or close * started, waiting for unsent * data to be sent */ u8 connect_nonblock : 1; /* non-blocking connect in * flight */ struct mutex clcsock_release_lock; /* protects clcsock of a listen * socket * */ }; #define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk) static inline void smc_init_saved_callbacks(struct smc_sock *smc) { smc->clcsk_state_change = NULL; smc->clcsk_data_ready = NULL; smc->clcsk_write_space = NULL; smc->clcsk_error_report = NULL; } static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) { return (struct smc_sock *) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } /* save target_cb in saved_cb, and replace target_cb with new_cb */ static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), void (*new_cb)(struct sock *), void (**saved_cb)(struct sock *)) { /* only save once */ if (!*saved_cb) *saved_cb = *target_cb; *target_cb = new_cb; } /* restore target_cb to saved_cb, and reset saved_cb to NULL */ static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *), void (**saved_cb)(struct sock *)) { if (!*saved_cb) return; *target_cb = *saved_cb; *saved_cb = NULL; } extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ #define SMC_SYSTEMID_LEN 8 extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ #define ntohll(x) be64_to_cpu(x) #define htonll(x) cpu_to_be64(x) /* convert an u32 value into network byte order, store it into a 3 byte field */ static inline void hton24(u8 *net, u32 host) { __be32 t; t = cpu_to_be32(host); memcpy(net, ((u8 *)&t) + 1, 3); } /* convert a received 3 byte field into host byte order*/ static inline u32 ntoh24(u8 *net) { __be32 t = 0; memcpy(((u8 *)&t) + 1, net, 3); return be32_to_cpu(t); } #ifdef CONFIG_XFRM static inline bool using_ipsec(struct smc_sock *smc) { return (smc->clcsock->sk->sk_policy[0] || smc->clcsock->sk->sk_policy[1]) ? true : false; } #else static inline bool using_ipsec(struct smc_sock *smc) { return false; } #endif struct smc_gidlist; struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); void smc_fill_gid_list(struct smc_link_group *lgr, struct smc_gidlist *gidlist, struct smc_ib_device *known_dev, u8 *known_gid); /* smc handshake limitation interface for netlink */ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb); int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info); int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info); static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag) { set_bit(flag, &sk->sk_flags); } #endif /* __SMC_H */ |
36 36 20 23 3 2 30 31 2 2 32 32 30 30 46 9 22 7 10 30 19 12 35 3 2 1 25 3 7 7 16 15 7 1 5 1 67 8 59 58 5 11 50 22 2 1 19 1 12 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | // SPDX-License-Identifier: GPL-2.0-only /* * fs/eventfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * */ #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched/signal.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/anon_inodes.h> #include <linux/syscalls.h> #include <linux/export.h> #include <linux/kref.h> #include <linux/eventfd.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/idr.h> #include <linux/uio.h> static DEFINE_IDA(eventfd_ida); struct eventfd_ctx { struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a * wakeup is performed on "wqh". If EFD_SEMAPHORE flag was not * specified, a read(2) will return the "count" value to userspace, * and will reset "count" to zero. The kernel side eventfd_signal() * also, adds to the "count" counter and issue a wakeup. */ __u64 count; unsigned int flags; int id; }; /** * eventfd_signal_mask - Increment the event counter * @ctx: [in] Pointer to the eventfd context. * @mask: [in] poll mask * * This function is supposed to be called by the kernel in paths that do not * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX * value, and we signal this as overflow condition by returning a EPOLLERR * to poll(2). */ void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask) { unsigned long flags; /* * Deadlock or stack overflow issues can happen if we recurse here * through waitqueue wakeup handlers. If the caller users potentially * nested waitqueues with custom wakeup handlers, then it should * check eventfd_signal_allowed() before calling this function. If * it returns false, the eventfd_signal() call should be deferred to a * safe context. */ if (WARN_ON_ONCE(current->in_eventfd)) return; spin_lock_irqsave(&ctx->wqh.lock, flags); current->in_eventfd = 1; if (ctx->count < ULLONG_MAX) ctx->count++; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask); current->in_eventfd = 0; spin_unlock_irqrestore(&ctx->wqh.lock, flags); } EXPORT_SYMBOL_GPL(eventfd_signal_mask); static void eventfd_free_ctx(struct eventfd_ctx *ctx) { if (ctx->id >= 0) ida_free(&eventfd_ida, ctx->id); kfree(ctx); } static void eventfd_free(struct kref *kref) { struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); eventfd_free_ctx(ctx); } /** * eventfd_ctx_put - Releases a reference to the internal eventfd context. * @ctx: [in] Pointer to eventfd context. * * The eventfd context reference must have been previously acquired either * with eventfd_ctx_fdget() or eventfd_ctx_fileget(). */ void eventfd_ctx_put(struct eventfd_ctx *ctx) { kref_put(&ctx->kref, eventfd_free); } EXPORT_SYMBOL_GPL(eventfd_ctx_put); static int eventfd_release(struct inode *inode, struct file *file) { struct eventfd_ctx *ctx = file->private_data; wake_up_poll(&ctx->wqh, EPOLLHUP); eventfd_ctx_put(ctx); return 0; } static __poll_t eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; u64 count; poll_wait(file, &ctx->wqh, wait); /* * All writes to ctx->count occur within ctx->wqh.lock. This read * can be done outside ctx->wqh.lock because we know that poll_wait * takes that lock (through add_wait_queue) if our caller will sleep. * * The read _can_ therefore seep into add_wait_queue's critical * section, but cannot move above it! add_wait_queue's spin_lock acts * as an acquire barrier and ensures that the read be ordered properly * against the writes. The following CAN happen and is safe: * * poll write * ----------------- ------------ * lock ctx->wqh.lock (in poll_wait) * count = ctx->count * __add_wait_queue * unlock ctx->wqh.lock * lock ctx->qwh.lock * ctx->count += n * if (waitqueue_active) * wake_up_locked_poll * unlock ctx->qwh.lock * eventfd_poll returns 0 * * but the following, which would miss a wakeup, cannot happen: * * poll write * ----------------- ------------ * count = ctx->count (INVALID!) * lock ctx->qwh.lock * ctx->count += n * **waitqueue_active is false** * **no wake_up_locked_poll!** * unlock ctx->qwh.lock * lock ctx->wqh.lock (in poll_wait) * __add_wait_queue * unlock ctx->wqh.lock * eventfd_poll returns 0 */ count = READ_ONCE(ctx->count); if (count > 0) events |= EPOLLIN; if (count == ULLONG_MAX) events |= EPOLLERR; if (ULLONG_MAX - 1 > count) events |= EPOLLOUT; return events; } void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) { lockdep_assert_held(&ctx->wqh.lock); *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count; ctx->count -= *cnt; } EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); /** * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. * @ctx: [in] Pointer to eventfd context. * @wait: [in] Wait queue to be removed. * @cnt: [out] Pointer to the 64-bit counter value. * * Returns %0 if successful, or the following error codes: * * -EAGAIN : The operation would have blocked. * * This is used to atomically remove a wait queue entry from the eventfd wait * queue head, and read/reset the counter value. */ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt) { unsigned long flags; spin_lock_irqsave(&ctx->wqh.lock, flags); eventfd_ctx_do_read(ctx, cnt); __remove_wait_queue(&ctx->wqh, wait); if (*cnt != 0 && waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLOUT); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return *cnt != 0 ? 0 : -EAGAIN; } EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct eventfd_ctx *ctx = file->private_data; __u64 ucnt = 0; if (iov_iter_count(to) < sizeof(ucnt)) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (!ctx->count) { if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) { spin_unlock_irq(&ctx->wqh.lock); return -EAGAIN; } if (wait_event_interruptible_locked_irq(ctx->wqh, ctx->count)) { spin_unlock_irq(&ctx->wqh.lock); return -ERESTARTSYS; } } eventfd_ctx_do_read(ctx, &ucnt); current->in_eventfd = 1; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLOUT); current->in_eventfd = 0; spin_unlock_irq(&ctx->wqh.lock); if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt))) return -EFAULT; return sizeof(ucnt); } static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct eventfd_ctx *ctx = file->private_data; ssize_t res; __u64 ucnt; if (count < sizeof(ucnt)) return -EINVAL; if (copy_from_user(&ucnt, buf, sizeof(ucnt))) return -EFAULT; if (ucnt == ULLONG_MAX) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; if (ULLONG_MAX - ctx->count > ucnt) res = sizeof(ucnt); else if (!(file->f_flags & O_NONBLOCK)) { res = wait_event_interruptible_locked_irq(ctx->wqh, ULLONG_MAX - ctx->count > ucnt); if (!res) res = sizeof(ucnt); } if (likely(res > 0)) { ctx->count += ucnt; current->in_eventfd = 1; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN); current->in_eventfd = 0; } spin_unlock_irq(&ctx->wqh.lock); return res; } #ifdef CONFIG_PROC_FS static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) { struct eventfd_ctx *ctx = f->private_data; spin_lock_irq(&ctx->wqh.lock); seq_printf(m, "eventfd-count: %16llx\n", (unsigned long long)ctx->count); spin_unlock_irq(&ctx->wqh.lock); seq_printf(m, "eventfd-id: %d\n", ctx->id); seq_printf(m, "eventfd-semaphore: %d\n", !!(ctx->flags & EFD_SEMAPHORE)); } #endif static const struct file_operations eventfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, .poll = eventfd_poll, .read_iter = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, }; /** * eventfd_fget - Acquire a reference of an eventfd file descriptor. * @fd: [in] Eventfd file descriptor. * * Returns a pointer to the eventfd file structure in case of success, or the * following error pointer: * * -EBADF : Invalid @fd file descriptor. * -EINVAL : The @fd file descriptor is not an eventfd file. */ struct file *eventfd_fget(int fd) { struct file *file; file = fget(fd); if (!file) return ERR_PTR(-EBADF); if (file->f_op != &eventfd_fops) { fput(file); return ERR_PTR(-EINVAL); } return file; } EXPORT_SYMBOL_GPL(eventfd_fget); /** * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context. * @fd: [in] Eventfd file descriptor. * * Returns a pointer to the internal eventfd context, otherwise the error * pointers returned by the following functions: * * eventfd_fget */ struct eventfd_ctx *eventfd_ctx_fdget(int fd) { struct eventfd_ctx *ctx; struct fd f = fdget(fd); if (!f.file) return ERR_PTR(-EBADF); ctx = eventfd_ctx_fileget(f.file); fdput(f); return ctx; } EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); /** * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context. * @file: [in] Eventfd file pointer. * * Returns a pointer to the internal eventfd context, otherwise the error * pointer: * * -EINVAL : The @fd file descriptor is not an eventfd file. */ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) { struct eventfd_ctx *ctx; if (file->f_op != &eventfd_fops) return ERR_PTR(-EINVAL); ctx = file->private_data; kref_get(&ctx->kref); return ctx; } EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); static int do_eventfd(unsigned int count, int flags) { struct eventfd_ctx *ctx; struct file *file; int fd; /* Check the EFD_* constants for consistency. */ BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); if (flags & ~EFD_FLAGS_SET) return -EINVAL; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; ctx->id = ida_alloc(&eventfd_ida, GFP_KERNEL); flags &= EFD_SHARED_FCNTL_FLAGS; flags |= O_RDWR; fd = get_unused_fd_flags(flags); if (fd < 0) goto err; file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); if (IS_ERR(file)) { put_unused_fd(fd); fd = PTR_ERR(file); goto err; } file->f_mode |= FMODE_NOWAIT; fd_install(fd, file); return fd; err: eventfd_free_ctx(ctx); return fd; } SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { return do_eventfd(count, flags); } SYSCALL_DEFINE1(eventfd, unsigned int, count) { return do_eventfd(count, 0); } |
3 1 1 1 5 4 2 2 7 1 6 2 2 2 1 1 2 1 1 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017 Nicira, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/if.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/openvswitch.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <net/netlink.h> #include <net/genetlink.h> #include "datapath.h" #include "meter.h" static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, }; static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, }; static u32 meter_hash(struct dp_meter_instance *ti, u32 id) { return id % ti->n_meters; } static void ovs_meter_free(struct dp_meter *meter) { if (!meter) return; kfree_rcu(meter, rcu); } /* Call with ovs_mutex or RCU read lock. */ static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl, u32 meter_id) { struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); u32 hash = meter_hash(ti, meter_id); struct dp_meter *meter; meter = rcu_dereference_ovsl(ti->dp_meters[hash]); if (meter && likely(meter->id == meter_id)) return meter; return NULL; } static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size) { struct dp_meter_instance *ti; ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL); if (!ti) return NULL; ti->n_meters = size; return ti; } static void dp_meter_instance_free(struct dp_meter_instance *ti) { kvfree(ti); } static void dp_meter_instance_free_rcu(struct rcu_head *rcu) { struct dp_meter_instance *ti; ti = container_of(rcu, struct dp_meter_instance, rcu); kvfree(ti); } static int dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size) { struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); int n_meters = min(size, ti->n_meters); struct dp_meter_instance *new_ti; int i; new_ti = dp_meter_instance_alloc(size); if (!new_ti) return -ENOMEM; for (i = 0; i < n_meters; i++) if (rcu_dereference_ovsl(ti->dp_meters[i])) new_ti->dp_meters[i] = ti->dp_meters[i]; rcu_assign_pointer(tbl->ti, new_ti); call_rcu(&ti->rcu, dp_meter_instance_free_rcu); return 0; } static void dp_meter_instance_insert(struct dp_meter_instance *ti, struct dp_meter *meter) { u32 hash; hash = meter_hash(ti, meter->id); rcu_assign_pointer(ti->dp_meters[hash], meter); } static void dp_meter_instance_remove(struct dp_meter_instance *ti, struct dp_meter *meter) { u32 hash; hash = meter_hash(ti, meter->id); RCU_INIT_POINTER(ti->dp_meters[hash], NULL); } static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) { struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); u32 hash = meter_hash(ti, meter->id); int err; /* In generally, slots selected should be empty, because * OvS uses id-pool to fetch a available id. */ if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash]))) return -EBUSY; dp_meter_instance_insert(ti, meter); /* That function is thread-safe. */ tbl->count++; if (tbl->count >= tbl->max_meters_allowed) { err = -EFBIG; goto attach_err; } if (tbl->count >= ti->n_meters && dp_meter_instance_realloc(tbl, ti->n_meters * 2)) { err = -ENOMEM; goto attach_err; } return 0; attach_err: dp_meter_instance_remove(ti, meter); tbl->count--; return err; } static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) { struct dp_meter_instance *ti; ASSERT_OVSL(); if (!meter) return 0; ti = rcu_dereference_ovsl(tbl->ti); dp_meter_instance_remove(ti, meter); tbl->count--; /* Shrink the meter array if necessary. */ if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && tbl->count <= (ti->n_meters / 4)) { int half_size = ti->n_meters / 2; int i; /* Avoid hash collision, don't move slots to other place. * Make sure there are no references of meters in array * which will be released. */ for (i = half_size; i < ti->n_meters; i++) if (rcu_dereference_ovsl(ti->dp_meters[i])) goto out; if (dp_meter_instance_realloc(tbl, half_size)) goto shrink_err; } out: return 0; shrink_err: dp_meter_instance_insert(ti, meter); tbl->count++; return -ENOMEM; } static struct sk_buff * ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, struct ovs_header **ovs_reply_header) { struct sk_buff *skb; struct ovs_header *ovs_header = genl_info_userhdr(info); skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); *ovs_reply_header = genlmsg_put(skb, info->snd_portid, info->snd_seq, &dp_meter_genl_family, 0, cmd); if (!*ovs_reply_header) { nlmsg_free(skb); return ERR_PTR(-EMSGSIZE); } (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; return skb; } static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, struct dp_meter *meter) { struct nlattr *nla; struct dp_meter_band *band; u16 i; if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) goto error; if (nla_put(reply, OVS_METER_ATTR_STATS, sizeof(struct ovs_flow_stats), &meter->stats)) goto error; if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, OVS_METER_ATTR_PAD)) goto error; nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto error; band = meter->bands; for (i = 0; i < meter->n_bands; ++i, ++band) { struct nlattr *band_nla; band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, sizeof(struct ovs_flow_stats), &band->stats)) goto error; nla_nest_end(reply, band_nla); } nla_nest_end(reply, nla); return 0; error: return -EMSGSIZE; } static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = genl_info_userhdr(info); struct ovs_header *ovs_reply_header; struct nlattr *nla, *band_nla; struct sk_buff *reply; struct datapath *dp; int err = -EMSGSIZE; reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, &ovs_reply_header); if (IS_ERR(reply)) return PTR_ERR(reply); ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { err = -ENODEV; goto exit_unlock; } if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, dp->meter_tbl.max_meters_allowed)) goto exit_unlock; ovs_unlock(); if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) goto nla_put_failure; nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto nla_put_failure; band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla) goto nla_put_failure; /* Currently only DROP band type is supported. */ if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP)) goto nla_put_failure; nla_nest_end(reply, band_nla); nla_nest_end(reply, nla); genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); exit_unlock: ovs_unlock(); nla_put_failure: nlmsg_free(reply); return err; } static struct dp_meter *dp_meter_create(struct nlattr **a) { struct nlattr *nla; int rem; u16 n_bands = 0; struct dp_meter *meter; struct dp_meter_band *band; int err; /* Validate attributes, count the bands. */ if (!a[OVS_METER_ATTR_BANDS]) return ERR_PTR(-EINVAL); nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) if (++n_bands > DP_MAX_BANDS) return ERR_PTR(-EINVAL); /* Allocate and set up the meter before locking anything. */ meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL_ACCOUNT); if (!meter) return ERR_PTR(-ENOMEM); meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]); meter->used = div_u64(ktime_get_ns(), 1000 * 1000); meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; spin_lock_init(&meter->lock); if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { meter->stats = *(struct ovs_flow_stats *) nla_data(a[OVS_METER_ATTR_STATS]); } meter->n_bands = n_bands; /* Set up meter bands. */ band = meter->bands; nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; u32 band_max_delta_t; err = nla_parse_deprecated((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, nla_data(nla), nla_len(nla), band_policy, NULL); if (err) goto exit_free_meter; if (!attr[OVS_BAND_ATTR_TYPE] || !attr[OVS_BAND_ATTR_RATE] || !attr[OVS_BAND_ATTR_BURST]) { err = -EINVAL; goto exit_free_meter; } band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); if (band->rate == 0) { err = -EINVAL; goto exit_free_meter; } band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); /* Figure out max delta_t that is enough to fill any bucket. * Keep max_delta_t size to the bucket units: * pkts => 1/1000 packets, kilobits => bits. * * Start with a full bucket. */ band->bucket = band->burst_size * 1000ULL; band_max_delta_t = div_u64(band->bucket, band->rate); if (band_max_delta_t > meter->max_delta_t) meter->max_delta_t = band_max_delta_t; band++; } return meter; exit_free_meter: kfree(meter); return ERR_PTR(err); } static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct dp_meter *meter, *old_meter; struct sk_buff *reply; struct ovs_header *ovs_reply_header; struct ovs_header *ovs_header = genl_info_userhdr(info); struct dp_meter_table *meter_tbl; struct datapath *dp; int err; u32 meter_id; bool failed; if (!a[OVS_METER_ATTR_ID]) return -EINVAL; meter = dp_meter_create(a); if (IS_ERR(meter)) return PTR_ERR(meter); reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, &ovs_reply_header); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto exit_free_meter; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { err = -ENODEV; goto exit_unlock; } meter_tbl = &dp->meter_tbl; meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); old_meter = lookup_meter(meter_tbl, meter_id); err = detach_meter(meter_tbl, old_meter); if (err) goto exit_unlock; err = attach_meter(meter_tbl, meter); if (err) goto exit_free_old_meter; ovs_unlock(); /* Build response with the meter_id and stats from * the old meter, if any. */ failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id); WARN_ON(failed); if (old_meter) { spin_lock_bh(&old_meter->lock); if (old_meter->keep_stats) { err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); WARN_ON(err); } spin_unlock_bh(&old_meter->lock); ovs_meter_free(old_meter); } genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); exit_free_old_meter: ovs_meter_free(old_meter); exit_unlock: ovs_unlock(); nlmsg_free(reply); exit_free_meter: kfree(meter); return err; } static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = genl_info_userhdr(info); struct ovs_header *ovs_reply_header; struct nlattr **a = info->attrs; struct dp_meter *meter; struct sk_buff *reply; struct datapath *dp; u32 meter_id; int err; if (!a[OVS_METER_ATTR_ID]) return -EINVAL; meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET, &ovs_reply_header); if (IS_ERR(reply)) return PTR_ERR(reply); ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { err = -ENODEV; goto exit_unlock; } /* Locate meter, copy stats. */ meter = lookup_meter(&dp->meter_tbl, meter_id); if (!meter) { err = -ENOENT; goto exit_unlock; } spin_lock_bh(&meter->lock); err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); spin_unlock_bh(&meter->lock); if (err) goto exit_unlock; ovs_unlock(); genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); exit_unlock: ovs_unlock(); nlmsg_free(reply); return err; } static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = genl_info_userhdr(info); struct ovs_header *ovs_reply_header; struct nlattr **a = info->attrs; struct dp_meter *old_meter; struct sk_buff *reply; struct datapath *dp; u32 meter_id; int err; if (!a[OVS_METER_ATTR_ID]) return -EINVAL; reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, &ovs_reply_header); if (IS_ERR(reply)) return PTR_ERR(reply); ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { err = -ENODEV; goto exit_unlock; } meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); old_meter = lookup_meter(&dp->meter_tbl, meter_id); if (old_meter) { spin_lock_bh(&old_meter->lock); err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); WARN_ON(err); spin_unlock_bh(&old_meter->lock); err = detach_meter(&dp->meter_tbl, old_meter); if (err) goto exit_unlock; } ovs_unlock(); ovs_meter_free(old_meter); genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); exit_unlock: ovs_unlock(); nlmsg_free(reply); return err; } /* Meter action execution. * * Return true 'meter_id' drop band is triggered. The 'skb' should be * dropped by the caller'. */ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, u32 meter_id) { long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); long long int long_delta_ms; struct dp_meter_band *band; struct dp_meter *meter; int i, band_exceeded_max = -1; u32 band_exceeded_rate = 0; u32 delta_ms; u32 cost; meter = lookup_meter(&dp->meter_tbl, meter_id); /* Do not drop the packet when there is no meter. */ if (!meter) return false; /* Lock the meter while using it. */ spin_lock(&meter->lock); long_delta_ms = (now_ms - meter->used); /* ms */ if (long_delta_ms < 0) { /* This condition means that we have several threads fighting * for a meter lock, and the one who received the packets a * bit later wins. Assuming that all racing threads received * packets at the same time to avoid overflow. */ long_delta_ms = 0; } /* Make sure delta_ms will not be too large, so that bucket will not * wrap around below. */ delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) ? meter->max_delta_t : (u32)long_delta_ms; /* Update meter statistics. */ meter->used = now_ms; meter->stats.n_packets += 1; meter->stats.n_bytes += skb->len; /* Bucket rate is either in kilobits per second, or in packets per * second. We maintain the bucket in the units of either bits or * 1/1000th of a packet, correspondingly. * Then, when rate is multiplied with milliseconds, we get the * bucket units: * msec * kbps = bits, and * msec * packets/sec = 1/1000 packets. * * 'cost' is the number of bucket units in this packet. */ cost = (meter->kbps) ? skb->len * 8 : 1000; /* Update all bands and find the one hit with the highest rate. */ for (i = 0; i < meter->n_bands; ++i) { long long int max_bucket_size; band = &meter->bands[i]; max_bucket_size = band->burst_size * 1000LL; band->bucket += delta_ms * band->rate; if (band->bucket > max_bucket_size) band->bucket = max_bucket_size; if (band->bucket >= cost) { band->bucket -= cost; } else if (band->rate > band_exceeded_rate) { band_exceeded_rate = band->rate; band_exceeded_max = i; } } if (band_exceeded_max >= 0) { /* Update band statistics. */ band = &meter->bands[band_exceeded_max]; band->stats.n_packets += 1; band->stats.n_bytes += skb->len; /* Drop band triggered, let the caller drop the 'skb'. */ if (band->type == OVS_METER_BAND_TYPE_DROP) { spin_unlock(&meter->lock); return true; } } spin_unlock(&meter->lock); return false; } static const struct genl_small_ops dp_meter_genl_ops[] = { { .cmd = OVS_METER_CMD_FEATURES, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_meter_cmd_features }, { .cmd = OVS_METER_CMD_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_meter_cmd_get, }, { .cmd = OVS_METER_CMD_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ .doit = ovs_meter_cmd_del }, }; static const struct genl_multicast_group ovs_meter_multicast_group = { .name = OVS_METER_MCGROUP, }; struct genl_family dp_meter_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_METER_FAMILY, .version = OVS_METER_VERSION, .maxattr = OVS_METER_ATTR_MAX, .policy = meter_policy, .netnsok = true, .parallel_ops = true, .small_ops = dp_meter_genl_ops, .n_small_ops = ARRAY_SIZE(dp_meter_genl_ops), .resv_start_op = OVS_METER_CMD_GET + 1, .mcgrps = &ovs_meter_multicast_group, .n_mcgrps = 1, .module = THIS_MODULE, }; int ovs_meters_init(struct datapath *dp) { struct dp_meter_table *tbl = &dp->meter_tbl; struct dp_meter_instance *ti; unsigned long free_mem_bytes; ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); if (!ti) return -ENOMEM; /* Allow meters in a datapath to use ~3.12% of physical memory. */ free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5); tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter), DP_METER_NUM_MAX); if (!tbl->max_meters_allowed) goto out_err; rcu_assign_pointer(tbl->ti, ti); tbl->count = 0; return 0; out_err: dp_meter_instance_free(ti); return -ENOMEM; } void ovs_meters_exit(struct datapath *dp) { struct dp_meter_table *tbl = &dp->meter_tbl; struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti); int i; for (i = 0; i < ti->n_meters; i++) ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i])); dp_meter_instance_free(ti); } |
364 29 3700 6 526 417 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Integer base 2 logarithm calculation * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_LOG2_H #define _LINUX_LOG2_H #include <linux/types.h> #include <linux/bitops.h> /* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented * more efficiently than using fls() and fls64() * - the arch is not required to handle n==0 if implementing the fallback */ #ifndef CONFIG_ARCH_HAS_ILOG2_U32 static __always_inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; } #endif #ifndef CONFIG_ARCH_HAS_ILOG2_U64 static __always_inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; } #endif /** * is_power_of_2() - check if a value is a power of two * @n: the value to check * * Determine whether some value is a power of two, where zero is * *not* considered a power of two. * Return: true if @n is a power of 2, otherwise false. */ static inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); } /** * __roundup_pow_of_two() - round up to nearest power of two * @n: value to round up */ static inline __attribute__((const)) unsigned long __roundup_pow_of_two(unsigned long n) { return 1UL << fls_long(n - 1); } /** * __rounddown_pow_of_two() - round down to nearest power of two * @n: value to round down */ static inline __attribute__((const)) unsigned long __rounddown_pow_of_two(unsigned long n) { return 1UL << (fls_long(n) - 1); } /** * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value * @n: parameter * * Use this where sparse expects a true constant expression, e.g. for array * indices. */ #define const_ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ (n) & (1ULL << 60) ? 60 : \ (n) & (1ULL << 59) ? 59 : \ (n) & (1ULL << 58) ? 58 : \ (n) & (1ULL << 57) ? 57 : \ (n) & (1ULL << 56) ? 56 : \ (n) & (1ULL << 55) ? 55 : \ (n) & (1ULL << 54) ? 54 : \ (n) & (1ULL << 53) ? 53 : \ (n) & (1ULL << 52) ? 52 : \ (n) & (1ULL << 51) ? 51 : \ (n) & (1ULL << 50) ? 50 : \ (n) & (1ULL << 49) ? 49 : \ (n) & (1ULL << 48) ? 48 : \ (n) & (1ULL << 47) ? 47 : \ (n) & (1ULL << 46) ? 46 : \ (n) & (1ULL << 45) ? 45 : \ (n) & (1ULL << 44) ? 44 : \ (n) & (1ULL << 43) ? 43 : \ (n) & (1ULL << 42) ? 42 : \ (n) & (1ULL << 41) ? 41 : \ (n) & (1ULL << 40) ? 40 : \ (n) & (1ULL << 39) ? 39 : \ (n) & (1ULL << 38) ? 38 : \ (n) & (1ULL << 37) ? 37 : \ (n) & (1ULL << 36) ? 36 : \ (n) & (1ULL << 35) ? 35 : \ (n) & (1ULL << 34) ? 34 : \ (n) & (1ULL << 33) ? 33 : \ (n) & (1ULL << 32) ? 32 : \ (n) & (1ULL << 31) ? 31 : \ (n) & (1ULL << 30) ? 30 : \ (n) & (1ULL << 29) ? 29 : \ (n) & (1ULL << 28) ? 28 : \ (n) & (1ULL << 27) ? 27 : \ (n) & (1ULL << 26) ? 26 : \ (n) & (1ULL << 25) ? 25 : \ (n) & (1ULL << 24) ? 24 : \ (n) & (1ULL << 23) ? 23 : \ (n) & (1ULL << 22) ? 22 : \ (n) & (1ULL << 21) ? 21 : \ (n) & (1ULL << 20) ? 20 : \ (n) & (1ULL << 19) ? 19 : \ (n) & (1ULL << 18) ? 18 : \ (n) & (1ULL << 17) ? 17 : \ (n) & (1ULL << 16) ? 16 : \ (n) & (1ULL << 15) ? 15 : \ (n) & (1ULL << 14) ? 14 : \ (n) & (1ULL << 13) ? 13 : \ (n) & (1ULL << 12) ? 12 : \ (n) & (1ULL << 11) ? 11 : \ (n) & (1ULL << 10) ? 10 : \ (n) & (1ULL << 9) ? 9 : \ (n) & (1ULL << 8) ? 8 : \ (n) & (1ULL << 7) ? 7 : \ (n) & (1ULL << 6) ? 6 : \ (n) & (1ULL << 5) ? 5 : \ (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ 1) : \ -1) /** * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value * @n: parameter * * constant-capable log of base 2 calculation * - this can be used to initialise global variables from constant data, hence * the massive ternary operator construction * * selects the appropriately-sized optimised version depending on sizeof(n) */ #define ilog2(n) \ ( \ __builtin_constant_p(n) ? \ ((n) < 2 ? 0 : \ 63 - __builtin_clzll(n)) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ ) /** * roundup_pow_of_two - round the given value up to nearest power of two * @n: parameter * * round the given value up to the nearest power of two * - the result is undefined when n == 0 * - this can be used to initialise global variables from constant data */ #define roundup_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ ((n) == 1) ? 1 : \ (1UL << (ilog2((n) - 1) + 1)) \ ) : \ __roundup_pow_of_two(n) \ ) /** * rounddown_pow_of_two - round the given value down to nearest power of two * @n: parameter * * round the given value down to the nearest power of two * - the result is undefined when n == 0 * - this can be used to initialise global variables from constant data */ #define rounddown_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ (1UL << ilog2(n))) : \ __rounddown_pow_of_two(n) \ ) static inline __attribute_const__ int __order_base_2(unsigned long n) { return n > 1 ? ilog2(n - 1) + 1 : 0; } /** * order_base_2 - calculate the (rounded up) base 2 order of the argument * @n: parameter * * The first few values calculated by this routine: * ob2(0) = 0 * ob2(1) = 0 * ob2(2) = 1 * ob2(3) = 2 * ob2(4) = 2 * ob2(5) = 3 * ... and so on. */ #define order_base_2(n) \ ( \ __builtin_constant_p(n) ? ( \ ((n) == 0 || (n) == 1) ? 0 : \ ilog2((n) - 1) + 1) : \ __order_base_2(n) \ ) static inline __attribute__((const)) int __bits_per(unsigned long n) { if (n < 2) return 1; if (is_power_of_2(n)) return order_base_2(n) + 1; return order_base_2(n); } /** * bits_per - calculate the number of bits required for the argument * @n: parameter * * This is constant-capable and can be used for compile time * initializations, e.g bitfields. * * The first few values calculated by this routine: * bf(0) = 1 * bf(1) = 1 * bf(2) = 2 * bf(3) = 2 * bf(4) = 3 * ... and so on. */ #define bits_per(n) \ ( \ __builtin_constant_p(n) ? ( \ ((n) == 0 || (n) == 1) \ ? 1 : ilog2(n) + 1 \ ) : \ __bits_per(n) \ ) #endif /* _LINUX_LOG2_H */ |
6822 6822 6455 6393 6810 6806 6808 4446 6804 5540 6798 6813 36 5 6803 5435 5436 5437 5435 6387 5 6401 5448 5435 2420 2418 6397 4301 2422 6386 6390 6403 12 6391 6403 6393 6390 6404 12 35 35 35 15 34 33 34 6391 6390 6391 6391 12 12 6409 6411 6415 6410 15 15 6416 1 35 36 36 36 1 35 34 35 35 35 35 6392 6393 6403 6392 6404 6384 6380 4304 2417 6392 6396 6390 6389 6385 6403 113 112 6339 6 19 1 6402 19 19 6767 8 6425 6798 6805 6408 6424 6411 4 6787 6797 6811 6787 5 6776 8 6780 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/irqflags.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/bug.h> #include "printk_ringbuffer.h" /** * DOC: printk_ringbuffer overview * * Data Structure * -------------- * The printk_ringbuffer is made up of 3 internal ringbuffers: * * desc_ring * A ring of descriptors and their meta data (such as sequence number, * timestamp, loglevel, etc.) as well as internal state information about * the record and logical positions specifying where in the other * ringbuffer the text strings are located. * * text_data_ring * A ring of data blocks. A data block consists of an unsigned long * integer (ID) that maps to a desc_ring index followed by the text * string of the record. * * The internal state information of a descriptor is the key element to allow * readers and writers to locklessly synchronize access to the data. * * Implementation * -------------- * * Descriptor Ring * ~~~~~~~~~~~~~~~ * The descriptor ring is an array of descriptors. A descriptor contains * essential meta data to track the data of a printk record using * blk_lpos structs pointing to associated text data blocks (see * "Data Rings" below). Each descriptor is assigned an ID that maps * directly to index values of the descriptor array and has a state. The ID * and the state are bitwise combined into a single descriptor field named * @state_var, allowing ID and state to be synchronously and atomically * updated. * * Descriptors have four states: * * reserved * A writer is modifying the record. * * committed * The record and all its data are written. A writer can reopen the * descriptor (transitioning it back to reserved), but in the committed * state the data is consistent. * * finalized * The record and all its data are complete and available for reading. A * writer cannot reopen the descriptor. * * reusable * The record exists, but its text and/or meta data may no longer be * available. * * Querying the @state_var of a record requires providing the ID of the * descriptor to query. This can yield a possible fifth (pseudo) state: * * miss * The descriptor being queried has an unexpected ID. * * The descriptor ring has a @tail_id that contains the ID of the oldest * descriptor and @head_id that contains the ID of the newest descriptor. * * When a new descriptor should be created (and the ring is full), the tail * descriptor is invalidated by first transitioning to the reusable state and * then invalidating all tail data blocks up to and including the data blocks * associated with the tail descriptor (for the text ring). Then * @tail_id is advanced, followed by advancing @head_id. And finally the * @state_var of the new descriptor is initialized to the new ID and reserved * state. * * The @tail_id can only be advanced if the new @tail_id would be in the * committed or reusable queried state. This makes it possible that a valid * sequence number of the tail is always available. * * Descriptor Finalization * ~~~~~~~~~~~~~~~~~~~~~~~ * When a writer calls the commit function prb_commit(), record data is * fully stored and is consistent within the ringbuffer. However, a writer can * reopen that record, claiming exclusive access (as with prb_reserve()), and * modify that record. When finished, the writer must again commit the record. * * In order for a record to be made available to readers (and also become * recyclable for writers), it must be finalized. A finalized record cannot be * reopened and can never become "unfinalized". Record finalization can occur * in three different scenarios: * * 1) A writer can simultaneously commit and finalize its record by calling * prb_final_commit() instead of prb_commit(). * * 2) When a new record is reserved and the previous record has been * committed via prb_commit(), that previous record is automatically * finalized. * * 3) When a record is committed via prb_commit() and a newer record * already exists, the record being committed is automatically finalized. * * Data Ring * ~~~~~~~~~ * The text data ring is a byte array composed of data blocks. Data blocks are * referenced by blk_lpos structs that point to the logical position of the * beginning of a data block and the beginning of the next adjacent data * block. Logical positions are mapped directly to index values of the byte * array ringbuffer. * * Each data block consists of an ID followed by the writer data. The ID is * the identifier of a descriptor that is associated with the data block. A * given data block is considered valid if all of the following conditions * are met: * * 1) The descriptor associated with the data block is in the committed * or finalized queried state. * * 2) The blk_lpos struct within the descriptor associated with the data * block references back to the same data block. * * 3) The data block is within the head/tail logical position range. * * If the writer data of a data block would extend beyond the end of the * byte array, only the ID of the data block is stored at the logical * position and the full data block (ID and writer data) is stored at the * beginning of the byte array. The referencing blk_lpos will point to the * ID before the wrap and the next data block will be at the logical * position adjacent the full data block after the wrap. * * Data rings have a @tail_lpos that points to the beginning of the oldest * data block and a @head_lpos that points to the logical position of the * next (not yet existing) data block. * * When a new data block should be created (and the ring is full), tail data * blocks will first be invalidated by putting their associated descriptors * into the reusable state and then pushing the @tail_lpos forward beyond * them. Then the @head_lpos is pushed forward and is associated with a new * descriptor. If a data block is not valid, the @tail_lpos cannot be * advanced beyond it. * * Info Array * ~~~~~~~~~~ * The general meta data of printk records are stored in printk_info structs, * stored in an array with the same number of elements as the descriptor ring. * Each info corresponds to the descriptor of the same index in the * descriptor ring. Info validity is confirmed by evaluating the corresponding * descriptor before and after loading the info. * * Usage * ----- * Here are some simple examples demonstrating writers and readers. For the * examples a global ringbuffer (test_rb) is available (which is not the * actual ringbuffer used by printk):: * * DEFINE_PRINTKRB(test_rb, 15, 5); * * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of * 1 MiB (2 ^ (15 + 5)) for text data. * * Sample writer code:: * * const char *textstr = "message text"; * struct prb_reserved_entry e; * struct printk_record r; * * // specify how much to allocate * prb_rec_init_wr(&r, strlen(textstr) + 1); * * if (prb_reserve(&e, &test_rb, &r)) { * snprintf(r.text_buf, r.text_buf_size, "%s", textstr); * * r.info->text_len = strlen(textstr); * r.info->ts_nsec = local_clock(); * r.info->caller_id = printk_caller_id(); * * // commit and finalize the record * prb_final_commit(&e); * } * * Note that additional writer functions are available to extend a record * after it has been committed but not yet finalized. This can be done as * long as no new records have been reserved and the caller is the same. * * Sample writer code (record extending):: * * // alternate rest of previous example * * r.info->text_len = strlen(textstr); * r.info->ts_nsec = local_clock(); * r.info->caller_id = printk_caller_id(); * * // commit the record (but do not finalize yet) * prb_commit(&e); * } * * ... * * // specify additional 5 bytes text space to extend * prb_rec_init_wr(&r, 5); * * // try to extend, but only if it does not exceed 32 bytes * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id(), 32)) { * snprintf(&r.text_buf[r.info->text_len], * r.text_buf_size - r.info->text_len, "hello"); * * r.info->text_len += 5; * * // commit and finalize the record * prb_final_commit(&e); * } * * Sample reader code:: * * struct printk_info info; * struct printk_record r; * char text_buf[32]; * u64 seq; * * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf)); * * prb_for_each_record(0, &test_rb, &seq, &r) { * if (info.seq != seq) * pr_warn("lost %llu records\n", info.seq - seq); * * if (info.text_len > r.text_buf_size) { * pr_warn("record %llu text truncated\n", info.seq); * text_buf[r.text_buf_size - 1] = 0; * } * * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec, * &text_buf[0]); * } * * Note that additional less convenient reader functions are available to * allow complex record access. * * ABA Issues * ~~~~~~~~~~ * To help avoid ABA issues, descriptors are referenced by IDs (array index * values combined with tagged bits counting array wraps) and data blocks are * referenced by logical positions (array index values combined with tagged * bits counting array wraps). However, on 32-bit systems the number of * tagged bits is relatively small such that an ABA incident is (at least * theoretically) possible. For example, if 4 million maximally sized (1KiB) * printk messages were to occur in NMI context on a 32-bit system, the * interrupted context would not be able to recognize that the 32-bit integer * completely wrapped and thus represents a different data block than the one * the interrupted context expects. * * To help combat this possibility, additional state checking is performed * (such as using cmpxchg() even though set() would suffice). These extra * checks are commented as such and will hopefully catch any ABA issue that * a 32-bit system might experience. * * Memory Barriers * ~~~~~~~~~~~~~~~ * Multiple memory barriers are used. To simplify proving correctness and * generating litmus tests, lines of code related to memory barriers * (loads, stores, and the associated memory barriers) are labeled:: * * LMM(function:letter) * * Comments reference the labels using only the "function:letter" part. * * The memory barrier pairs and their ordering are: * * desc_reserve:D / desc_reserve:B * push descriptor tail (id), then push descriptor head (id) * * desc_reserve:D / data_push_tail:B * push data tail (lpos), then set new descriptor reserved (state) * * desc_reserve:D / desc_push_tail:C * push descriptor tail (id), then set new descriptor reserved (state) * * desc_reserve:D / prb_first_seq:C * push descriptor tail (id), then set new descriptor reserved (state) * * desc_reserve:F / desc_read:D * set new descriptor id and reserved (state), then allow writer changes * * data_alloc:A (or data_realloc:A) / desc_read:D * set old descriptor reusable (state), then modify new data block area * * data_alloc:A (or data_realloc:A) / data_push_tail:B * push data tail (lpos), then modify new data block area * * _prb_commit:B / desc_read:B * store writer changes, then set new descriptor committed (state) * * desc_reopen_last:A / _prb_commit:B * set descriptor reserved (state), then read descriptor data * * _prb_commit:B / desc_reserve:D * set new descriptor committed (state), then check descriptor head (id) * * data_push_tail:D / data_push_tail:A * set descriptor reusable (state), then push data tail (lpos) * * desc_push_tail:B / desc_reserve:D * set descriptor reusable (state), then push descriptor tail (id) */ #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) #define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1) #define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits) #define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1) /* Determine the data array index from a logical position. */ #define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring)) /* Determine the desc array index from an ID or sequence number. */ #define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring)) /* Determine how many times the data array has wrapped. */ #define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits) /* Determine if a logical position refers to a data-less block. */ #define LPOS_DATALESS(lpos) ((lpos) & 1UL) #define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \ LPOS_DATALESS((blk)->next)) /* Get the logical position at index 0 of the current wrap. */ #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ ((lpos) & ~DATA_SIZE_MASK(data_ring)) /* Get the ID for the same index of the previous wrap as the given ID. */ #define DESC_ID_PREV_WRAP(desc_ring, id) \ DESC_ID((id) - DESCS_COUNT(desc_ring)) /* * A data block: mapped directly to the beginning of the data block area * specified as a logical position within the data ring. * * @id: the ID of the associated descriptor * @data: the writer data * * Note that the size of a data block is only known by its associated * descriptor. */ struct prb_data_block { unsigned long id; char data[]; }; /* * Return the descriptor associated with @n. @n can be either a * descriptor ID or a sequence number. */ static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n) { return &desc_ring->descs[DESC_INDEX(desc_ring, n)]; } /* * Return the printk_info associated with @n. @n can be either a * descriptor ID or a sequence number. */ static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n) { return &desc_ring->infos[DESC_INDEX(desc_ring, n)]; } static struct prb_data_block *to_block(struct prb_data_ring *data_ring, unsigned long begin_lpos) { return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)]; } /* * Increase the data size to account for data block meta data plus any * padding so that the adjacent data block is aligned on the ID size. */ static unsigned int to_blk_size(unsigned int size) { struct prb_data_block *db = NULL; size += sizeof(*db); size = ALIGN(size, sizeof(db->id)); return size; } /* * Sanity checker for reserve size. The ringbuffer code assumes that a data * block does not exceed the maximum possible size that could fit within the * ringbuffer. This function provides that basic size check so that the * assumption is safe. */ static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) { struct prb_data_block *db = NULL; if (size == 0) return true; /* * Ensure the alignment padded size could possibly fit in the data * array. The largest possible data block must still leave room for * at least the ID of the next block. */ size = to_blk_size(size); if (size > DATA_SIZE(data_ring) - sizeof(db->id)) return false; return true; } /* Query the state of a descriptor. */ static enum desc_state get_desc_state(unsigned long id, unsigned long state_val) { if (id != DESC_ID(state_val)) return desc_miss; return DESC_STATE(state_val); } /* * Get a copy of a specified descriptor and return its queried state. If the * descriptor is in an inconsistent state (miss or reserved), the caller can * only expect the descriptor's @state_var field to be valid. * * The sequence number and caller_id can be optionally retrieved. Like all * non-state_var data, they are only valid if the descriptor is in a * consistent state. */ static enum desc_state desc_read(struct prb_desc_ring *desc_ring, unsigned long id, struct prb_desc *desc_out, u64 *seq_out, u32 *caller_id_out) { struct printk_info *info = to_info(desc_ring, id); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; enum desc_state d_state; unsigned long state_val; /* Check the descriptor state. */ state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */ d_state = get_desc_state(id, state_val); if (d_state == desc_miss || d_state == desc_reserved) { /* * The descriptor is in an inconsistent state. Set at least * @state_var so that the caller can see the details of * the inconsistent state. */ goto out; } /* * Guarantee the state is loaded before copying the descriptor * content. This avoids copying obsolete descriptor content that might * not apply to the descriptor state. This pairs with _prb_commit:B. * * Memory barrier involvement: * * If desc_read:A reads from _prb_commit:B, then desc_read:C reads * from _prb_commit:A. * * Relies on: * * WMB from _prb_commit:A to _prb_commit:B * matching * RMB from desc_read:A to desc_read:C */ smp_rmb(); /* LMM(desc_read:B) */ /* * Copy the descriptor data. The data is not valid until the * state has been re-checked. A memcpy() for all of @desc * cannot be used because of the atomic_t @state_var field. */ if (desc_out) { memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos, sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */ } if (seq_out) *seq_out = info->seq; /* also part of desc_read:C */ if (caller_id_out) *caller_id_out = info->caller_id; /* also part of desc_read:C */ /* * 1. Guarantee the descriptor content is loaded before re-checking * the state. This avoids reading an obsolete descriptor state * that may not apply to the copied content. This pairs with * desc_reserve:F. * * Memory barrier involvement: * * If desc_read:C reads from desc_reserve:G, then desc_read:E * reads from desc_reserve:F. * * Relies on: * * WMB from desc_reserve:F to desc_reserve:G * matching * RMB from desc_read:C to desc_read:E * * 2. Guarantee the record data is loaded before re-checking the * state. This avoids reading an obsolete descriptor state that may * not apply to the copied data. This pairs with data_alloc:A and * data_realloc:A. * * Memory barrier involvement: * * If copy_data:A reads from data_alloc:B, then desc_read:E * reads from desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to data_alloc:B * matching * RMB from desc_read:C to desc_read:E * * Note: desc_make_reusable:A and data_alloc:B can be different * CPUs. However, the data_alloc:B CPU (which performs the * full memory barrier) must have previously seen * desc_make_reusable:A. */ smp_rmb(); /* LMM(desc_read:D) */ /* * The data has been copied. Return the current descriptor state, * which may have changed since the load above. */ state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */ d_state = get_desc_state(id, state_val); out: if (desc_out) atomic_long_set(&desc_out->state_var, state_val); return d_state; } /* * Take a specified descriptor out of the finalized state by attempting * the transition from finalized to reusable. Either this context or some * other context will have been successful. */ static void desc_make_reusable(struct prb_desc_ring *desc_ring, unsigned long id) { unsigned long val_finalized = DESC_SV(id, desc_finalized); unsigned long val_reusable = DESC_SV(id, desc_reusable); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; atomic_long_cmpxchg_relaxed(state_var, val_finalized, val_reusable); /* LMM(desc_make_reusable:A) */ } /* * Given the text data ring, put the associated descriptor of each * data block from @lpos_begin until @lpos_end into the reusable state. * * If there is any problem making the associated descriptor reusable, either * the descriptor has not yet been finalized or another writer context has * already pushed the tail lpos past the problematic data block. Regardless, * on error the caller can re-load the tail lpos to determine the situation. */ static bool data_make_reusable(struct printk_ringbuffer *rb, unsigned long lpos_begin, unsigned long lpos_end, unsigned long *lpos_out) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_desc_ring *desc_ring = &rb->desc_ring; struct prb_data_block *blk; enum desc_state d_state; struct prb_desc desc; struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos; unsigned long id; /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { blk = to_block(data_ring, lpos_begin); /* * Load the block ID from the data block. This is a data race * against a writer that may have newly reserved this data * area. If the loaded value matches a valid descriptor ID, * the blk_lpos of that descriptor will be checked to make * sure it points back to this data block. If the check fails, * the data area has been recycled by another writer. */ id = blk->id; /* LMM(data_make_reusable:A) */ d_state = desc_read(desc_ring, id, &desc, NULL, NULL); /* LMM(data_make_reusable:B) */ switch (d_state) { case desc_miss: case desc_reserved: case desc_committed: return false; case desc_finalized: /* * This data block is invalid if the descriptor * does not point back to it. */ if (blk_lpos->begin != lpos_begin) return false; desc_make_reusable(desc_ring, id); break; case desc_reusable: /* * This data block is invalid if the descriptor * does not point back to it. */ if (blk_lpos->begin != lpos_begin) return false; break; } /* Advance @lpos_begin to the next data block. */ lpos_begin = blk_lpos->next; } *lpos_out = lpos_begin; return true; } /* * Advance the data ring tail to at least @lpos. This function puts * descriptors into the reusable state if the tail is pushed beyond * their associated data block. */ static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos) { struct prb_data_ring *data_ring = &rb->text_data_ring; unsigned long tail_lpos_new; unsigned long tail_lpos; unsigned long next_lpos; /* If @lpos is from a data-less block, there is nothing to do. */ if (LPOS_DATALESS(lpos)) return true; /* * Any descriptor states that have transitioned to reusable due to the * data tail being pushed to this loaded value will be visible to this * CPU. This pairs with data_push_tail:D. * * Memory barrier involvement: * * If data_push_tail:A reads from data_push_tail:D, then this CPU can * see desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to data_push_tail:D * matches * READFROM from data_push_tail:D to data_push_tail:A * thus * READFROM from desc_make_reusable:A to this CPU */ tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */ /* * Loop until the tail lpos is at or beyond @lpos. This condition * may already be satisfied, resulting in no full memory barrier * from data_push_tail:D being performed. However, since this CPU * sees the new tail lpos, any descriptor states that transitioned to * the reusable state must already be visible. */ while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { /* * Make all descriptors reusable that are associated with * data blocks before @lpos. */ if (!data_make_reusable(rb, tail_lpos, lpos, &next_lpos)) { /* * 1. Guarantee the block ID loaded in * data_make_reusable() is performed before * reloading the tail lpos. The failed * data_make_reusable() may be due to a newly * recycled data area causing the tail lpos to * have been previously pushed. This pairs with * data_alloc:A and data_realloc:A. * * Memory barrier involvement: * * If data_make_reusable:A reads from data_alloc:B, * then data_push_tail:C reads from * data_push_tail:D. * * Relies on: * * MB from data_push_tail:D to data_alloc:B * matching * RMB from data_make_reusable:A to * data_push_tail:C * * Note: data_push_tail:D and data_alloc:B can be * different CPUs. However, the data_alloc:B * CPU (which performs the full memory * barrier) must have previously seen * data_push_tail:D. * * 2. Guarantee the descriptor state loaded in * data_make_reusable() is performed before * reloading the tail lpos. The failed * data_make_reusable() may be due to a newly * recycled descriptor causing the tail lpos to * have been previously pushed. This pairs with * desc_reserve:D. * * Memory barrier involvement: * * If data_make_reusable:B reads from * desc_reserve:F, then data_push_tail:C reads * from data_push_tail:D. * * Relies on: * * MB from data_push_tail:D to desc_reserve:F * matching * RMB from data_make_reusable:B to * data_push_tail:C * * Note: data_push_tail:D and desc_reserve:F can * be different CPUs. However, the * desc_reserve:F CPU (which performs the * full memory barrier) must have previously * seen data_push_tail:D. */ smp_rmb(); /* LMM(data_push_tail:B) */ tail_lpos_new = atomic_long_read(&data_ring->tail_lpos ); /* LMM(data_push_tail:C) */ if (tail_lpos_new == tail_lpos) return false; /* Another CPU pushed the tail. Try again. */ tail_lpos = tail_lpos_new; continue; } /* * Guarantee any descriptor states that have transitioned to * reusable are stored before pushing the tail lpos. A full * memory barrier is needed since other CPUs may have made * the descriptor states reusable. This pairs with * data_push_tail:A. */ if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos, next_lpos)) { /* LMM(data_push_tail:D) */ break; } } return true; } /* * Advance the desc ring tail. This function advances the tail by one * descriptor, thus invalidating the oldest descriptor. Before advancing * the tail, the tail descriptor is made reusable and all data blocks up to * and including the descriptor's data block are invalidated (i.e. the data * ring tail is pushed past the data block of the descriptor being made * reusable). */ static bool desc_push_tail(struct printk_ringbuffer *rb, unsigned long tail_id) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; struct prb_desc desc; d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL); switch (d_state) { case desc_miss: /* * If the ID is exactly 1 wrap behind the expected, it is * in the process of being reserved by another writer and * must be considered reserved. */ if (DESC_ID(atomic_long_read(&desc.state_var)) == DESC_ID_PREV_WRAP(desc_ring, tail_id)) { return false; } /* * The ID has changed. Another writer must have pushed the * tail and recycled the descriptor already. Success is * returned because the caller is only interested in the * specified tail being pushed, which it was. */ return true; case desc_reserved: case desc_committed: return false; case desc_finalized: desc_make_reusable(desc_ring, tail_id); break; case desc_reusable: break; } /* * Data blocks must be invalidated before their associated * descriptor can be made available for recycling. Invalidating * them later is not possible because there is no way to trust * data blocks once their associated descriptor is gone. */ if (!data_push_tail(rb, desc.text_blk_lpos.next)) return false; /* * Check the next descriptor after @tail_id before pushing the tail * to it because the tail must always be in a finalized or reusable * state. The implementation of prb_first_seq() relies on this. * * A successful read implies that the next descriptor is less than or * equal to @head_id so there is no risk of pushing the tail past the * head. */ d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc, NULL, NULL); /* LMM(desc_push_tail:A) */ if (d_state == desc_finalized || d_state == desc_reusable) { /* * Guarantee any descriptor states that have transitioned to * reusable are stored before pushing the tail ID. This allows * verifying the recycled descriptor state. A full memory * barrier is needed since other CPUs may have made the * descriptor states reusable. This pairs with desc_reserve:D. */ atomic_long_cmpxchg(&desc_ring->tail_id, tail_id, DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */ } else { /* * Guarantee the last state load from desc_read() is before * reloading @tail_id in order to see a new tail ID in the * case that the descriptor has been recycled. This pairs * with desc_reserve:D. * * Memory barrier involvement: * * If desc_push_tail:A reads from desc_reserve:F, then * desc_push_tail:D reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:F * matching * RMB from desc_push_tail:A to desc_push_tail:D * * Note: desc_push_tail:B and desc_reserve:F can be different * CPUs. However, the desc_reserve:F CPU (which performs * the full memory barrier) must have previously seen * desc_push_tail:B. */ smp_rmb(); /* LMM(desc_push_tail:C) */ /* * Re-check the tail ID. The descriptor following @tail_id is * not in an allowed tail state. But if the tail has since * been moved by another CPU, then it does not matter. */ if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */ return false; } return true; } /* Reserve a new descriptor, invalidating the oldest if necessary. */ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long prev_state_val; unsigned long id_prev_wrap; struct prb_desc *desc; unsigned long head_id; unsigned long id; head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */ do { id = DESC_ID(head_id + 1); id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id); /* * Guarantee the head ID is read before reading the tail ID. * Since the tail ID is updated before the head ID, this * guarantees that @id_prev_wrap is never ahead of the tail * ID. This pairs with desc_reserve:D. * * Memory barrier involvement: * * If desc_reserve:A reads from desc_reserve:D, then * desc_reserve:C reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:D * matching * RMB from desc_reserve:A to desc_reserve:C * * Note: desc_push_tail:B and desc_reserve:D can be different * CPUs. However, the desc_reserve:D CPU (which performs * the full memory barrier) must have previously seen * desc_push_tail:B. */ smp_rmb(); /* LMM(desc_reserve:B) */ if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id )) { /* LMM(desc_reserve:C) */ /* * Make space for the new descriptor by * advancing the tail. */ if (!desc_push_tail(rb, id_prev_wrap)) return false; } /* * 1. Guarantee the tail ID is read before validating the * recycled descriptor state. A read memory barrier is * sufficient for this. This pairs with desc_push_tail:B. * * Memory barrier involvement: * * If desc_reserve:C reads from desc_push_tail:B, then * desc_reserve:E reads from desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to desc_push_tail:B * matching * RMB from desc_reserve:C to desc_reserve:E * * Note: desc_make_reusable:A and desc_push_tail:B can be * different CPUs. However, the desc_push_tail:B CPU * (which performs the full memory barrier) must have * previously seen desc_make_reusable:A. * * 2. Guarantee the tail ID is stored before storing the head * ID. This pairs with desc_reserve:B. * * 3. Guarantee any data ring tail changes are stored before * recycling the descriptor. Data ring tail changes can * happen via desc_push_tail()->data_push_tail(). A full * memory barrier is needed since another CPU may have * pushed the data ring tails. This pairs with * data_push_tail:B. * * 4. Guarantee a new tail ID is stored before recycling the * descriptor. A full memory barrier is needed since * another CPU may have pushed the tail ID. This pairs * with desc_push_tail:C and this also pairs with * prb_first_seq:C. * * 5. Guarantee the head ID is stored before trying to * finalize the previous descriptor. This pairs with * _prb_commit:B. */ } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id, id)); /* LMM(desc_reserve:D) */ desc = to_desc(desc_ring, id); /* * If the descriptor has been recycled, verify the old state val. * See "ABA Issues" about why this verification is performed. */ prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */ if (prev_state_val && get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) { WARN_ON_ONCE(1); return false; } /* * Assign the descriptor a new ID and set its state to reserved. * See "ABA Issues" about why cmpxchg() instead of set() is used. * * Guarantee the new descriptor ID and state is stored before making * any other changes. A write memory barrier is sufficient for this. * This pairs with desc_read:D. */ if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val, DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */ WARN_ON_ONCE(1); return false; } /* Now data in @desc can be modified: LMM(desc_reserve:G) */ *id_out = id; return true; } /* Determine the end of a data block. */ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, unsigned long lpos, unsigned int size) { unsigned long begin_lpos; unsigned long next_lpos; begin_lpos = lpos; next_lpos = lpos + size; /* First check if the data block does not wrap. */ if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) return next_lpos; /* Wrapping data blocks store their data at the beginning. */ return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size); } /* * Allocate a new data block, invalidating the oldest data block(s) * if necessary. This function also associates the data block with * a specified descriptor. */ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, struct prb_data_blk_lpos *blk_lpos, unsigned long id) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_data_block *blk; unsigned long begin_lpos; unsigned long next_lpos; if (size == 0) { /* Specify a data-less block. */ blk_lpos->begin = NO_LPOS; blk_lpos->next = NO_LPOS; return NULL; } size = to_blk_size(size); begin_lpos = atomic_long_read(&data_ring->head_lpos); do { next_lpos = get_next_lpos(data_ring, begin_lpos, size); if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) { /* Failed to allocate, specify a data-less block. */ blk_lpos->begin = FAILED_LPOS; blk_lpos->next = FAILED_LPOS; return NULL; } /* * 1. Guarantee any descriptor states that have transitioned * to reusable are stored before modifying the newly * allocated data area. A full memory barrier is needed * since other CPUs may have made the descriptor states * reusable. See data_push_tail:A about why the reusable * states are visible. This pairs with desc_read:D. * * 2. Guarantee any updated tail lpos is stored before * modifying the newly allocated data area. Another CPU may * be in data_make_reusable() and is reading a block ID * from this area. data_make_reusable() can handle reading * a garbage block ID value, but then it must be able to * load a new tail lpos. A full memory barrier is needed * since other CPUs may have updated the tail lpos. This * pairs with data_push_tail:B. */ } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos, next_lpos)); /* LMM(data_alloc:A) */ blk = to_block(data_ring, begin_lpos); blk->id = id; /* LMM(data_alloc:B) */ if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); /* * Store the ID on the wrapped block for consistency. * The printk_ringbuffer does not actually use it. */ blk->id = id; } blk_lpos->begin = begin_lpos; blk_lpos->next = next_lpos; return &blk->data[0]; } /* * Try to resize an existing data block associated with the descriptor * specified by @id. If the resized data block should become wrapped, it * copies the old data to the new data block. If @size yields a data block * with the same or less size, the data block is left as is. * * Fail if this is not the last allocated data block or if there is not * enough space or it is not possible make enough space. * * Return a pointer to the beginning of the entire data buffer or NULL on * failure. */ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, struct prb_data_blk_lpos *blk_lpos, unsigned long id) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_data_block *blk; unsigned long head_lpos; unsigned long next_lpos; bool wrapped; /* Reallocation only works if @blk_lpos is the newest data block. */ head_lpos = atomic_long_read(&data_ring->head_lpos); if (head_lpos != blk_lpos->next) return NULL; /* Keep track if @blk_lpos was a wrapping data block. */ wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); size = to_blk_size(size); next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); /* If the data block does not increase, there is nothing to do. */ if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { if (wrapped) blk = to_block(data_ring, 0); else blk = to_block(data_ring, blk_lpos->begin); return &blk->data[0]; } if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) return NULL; /* The memory barrier involvement is the same as data_alloc:A. */ if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos, next_lpos)) { /* LMM(data_realloc:A) */ return NULL; } blk = to_block(data_ring, blk_lpos->begin); if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { struct prb_data_block *old_blk = blk; /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); /* * Store the ID on the wrapped block for consistency. * The printk_ringbuffer does not actually use it. */ blk->id = id; if (!wrapped) { /* * Since the allocated space is now in the newly * created wrapping data block, copy the content * from the old data block. */ memcpy(&blk->data[0], &old_blk->data[0], (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id)); } } blk_lpos->next = next_lpos; return &blk->data[0]; } /* Return the number of bytes used by a data block. */ static unsigned int space_used(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos) { /* Data-less blocks take no space. */ if (BLK_DATALESS(blk_lpos)) return 0; if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { /* Data block does not wrap. */ return (DATA_INDEX(data_ring, blk_lpos->next) - DATA_INDEX(data_ring, blk_lpos->begin)); } /* * For wrapping data blocks, the trailing (wasted) space is * also counted. */ return (DATA_INDEX(data_ring, blk_lpos->next) + DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin)); } /* * Given @blk_lpos, return a pointer to the writer data from the data block * and calculate the size of the data part. A NULL pointer is returned if * @blk_lpos specifies values that could never be legal. * * This function (used by readers) performs strict validation on the lpos * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is * triggered if an internal error is detected. */ static const char *get_data(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos, unsigned int *data_size) { struct prb_data_block *db; /* Data-less data block description. */ if (BLK_DATALESS(blk_lpos)) { if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { *data_size = 0; return ""; } return NULL; } /* Regular data block: @begin less than @next and in same wrap. */ if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && blk_lpos->begin < blk_lpos->next) { db = to_block(data_ring, blk_lpos->begin); *data_size = blk_lpos->next - blk_lpos->begin; /* Wrapping data block: @begin is one wrap behind @next. */ } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == DATA_WRAPS(data_ring, blk_lpos->next)) { db = to_block(data_ring, 0); *data_size = DATA_INDEX(data_ring, blk_lpos->next); /* Illegal block description. */ } else { WARN_ON_ONCE(1); return NULL; } /* A valid data block will always be aligned to the ID size. */ if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { return NULL; } /* A valid data block will always have at least an ID. */ if (WARN_ON_ONCE(*data_size < sizeof(db->id))) return NULL; /* Subtract block ID space from size to reflect data size. */ *data_size -= sizeof(db->id); return &db->data[0]; } /* * Attempt to transition the newest descriptor from committed back to reserved * so that the record can be modified by a writer again. This is only possible * if the descriptor is not yet finalized and the provided @caller_id matches. */ static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring, u32 caller_id, unsigned long *id_out) { unsigned long prev_state_val; enum desc_state d_state; struct prb_desc desc; struct prb_desc *d; unsigned long id; u32 cid; id = atomic_long_read(&desc_ring->head_id); /* * To reduce unnecessarily reopening, first check if the descriptor * state and caller ID are correct. */ d_state = desc_read(desc_ring, id, &desc, NULL, &cid); if (d_state != desc_committed || cid != caller_id) return NULL; d = to_desc(desc_ring, id); prev_state_val = DESC_SV(id, desc_committed); /* * Guarantee the reserved state is stored before reading any * record data. A full memory barrier is needed because @state_var * modification is followed by reading. This pairs with _prb_commit:B. * * Memory barrier involvement: * * If desc_reopen_last:A reads from _prb_commit:B, then * prb_reserve_in_last:A reads from _prb_commit:A. * * Relies on: * * WMB from _prb_commit:A to _prb_commit:B * matching * MB If desc_reopen_last:A to prb_reserve_in_last:A */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */ return NULL; } *id_out = id; return d; } /** * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer * used by the newest record. * * @e: The entry structure to setup. * @rb: The ringbuffer to re-reserve and extend data in. * @r: The record structure to allocate buffers for. * @caller_id: The caller ID of the caller (reserving writer). * @max_size: Fail if the extended size would be greater than this. * * This is the public function available to writers to re-reserve and extend * data. * * The writer specifies the text size to extend (not the new total size) by * setting the @text_buf_size field of @r. To ensure proper initialization * of @r, prb_rec_init_wr() should be used. * * This function will fail if @caller_id does not match the caller ID of the * newest record. In that case the caller must reserve new data using * prb_reserve(). * * Context: Any context. Disables local interrupts on success. * Return: true if text data could be extended, otherwise false. * * On success: * * - @r->text_buf points to the beginning of the entire text buffer. * * - @r->text_buf_size is set to the new total size of the buffer. * * - @r->info is not touched so that @r->info->text_len could be used * to append the text. * * - prb_record_text_space() can be used on @e to query the new * actually used space. * * Important: All @r->info fields will already be set with the current values * for the record. I.e. @r->info->text_len will be less than * @text_buf_size. Writers can use @r->info->text_len to know * where concatenation begins and writers should update * @r->info->text_len after concatenating. */ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r, u32 caller_id, unsigned int max_size) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info; unsigned int data_size; struct prb_desc *d; unsigned long id; local_irq_save(e->irqflags); /* Transition the newest descriptor back to the reserved state. */ d = desc_reopen_last(desc_ring, caller_id, &id); if (!d) { local_irq_restore(e->irqflags); goto fail_reopen; } /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ info = to_info(desc_ring, id); /* * Set the @e fields here so that prb_commit() can be used if * anything fails from now on. */ e->rb = rb; e->id = id; /* * desc_reopen_last() checked the caller_id, but there was no * exclusive access at that point. The descriptor may have * changed since then. */ if (caller_id != info->caller_id) goto fail; if (BLK_DATALESS(&d->text_blk_lpos)) { if (WARN_ON_ONCE(info->text_len != 0)) { pr_warn_once("wrong text_len value (%hu, expecting 0)\n", info->text_len); info->text_len = 0; } if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; if (r->text_buf_size > max_size) goto fail; r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); } else { if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size)) goto fail; /* * Increase the buffer size to include the original size. If * the meta data (@text_len) is not sane, use the full data * block size. */ if (WARN_ON_ONCE(info->text_len > data_size)) { pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n", info->text_len, data_size); info->text_len = data_size; } r->text_buf_size += info->text_len; if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; if (r->text_buf_size > max_size) goto fail; r->text_buf = data_realloc(rb, r->text_buf_size, &d->text_blk_lpos, id); } if (r->text_buf_size && !r->text_buf) goto fail; r->info = info; e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); return true; fail: prb_commit(e); /* prb_commit() re-enabled interrupts. */ fail_reopen: /* Make it clear to the caller that the re-reserve failed. */ memset(r, 0, sizeof(*r)); return false; } /* * Attempt to finalize a specified descriptor. If this fails, the descriptor * is either already final or it will finalize itself when the writer commits. */ static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) { unsigned long prev_state_val = DESC_SV(id, desc_committed); struct prb_desc *d = to_desc(desc_ring, id); atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ /* Best effort to remember the last finalized @id. */ atomic_long_set(&desc_ring->last_finalized_id, id); } /** * prb_reserve() - Reserve space in the ringbuffer. * * @e: The entry structure to setup. * @rb: The ringbuffer to reserve data in. * @r: The record structure to allocate buffers for. * * This is the public function available to writers to reserve data. * * The writer specifies the text size to reserve by setting the * @text_buf_size field of @r. To ensure proper initialization of @r, * prb_rec_init_wr() should be used. * * Context: Any context. Disables local interrupts on success. * Return: true if at least text data could be allocated, otherwise false. * * On success, the fields @info and @text_buf of @r will be set by this * function and should be filled in by the writer before committing. Also * on success, prb_record_text_space() can be used on @e to query the actual * space used for the text data block. * * Important: @info->text_len needs to be set correctly by the writer in * order for data to be readable and/or extended. Its value * is initialized to 0. */ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info; struct prb_desc *d; unsigned long id; u64 seq; if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; /* * Descriptors in the reserved state act as blockers to all further * reservations once the desc_ring has fully wrapped. Disable * interrupts during the reserve/commit window in order to minimize * the likelihood of this happening. */ local_irq_save(e->irqflags); if (!desc_reserve(rb, &id)) { /* Descriptor reservation failures are tracked. */ atomic_long_inc(&rb->fail); local_irq_restore(e->irqflags); goto fail; } d = to_desc(desc_ring, id); info = to_info(desc_ring, id); /* * All @info fields (except @seq) are cleared and must be filled in * by the writer. Save @seq before clearing because it is used to * determine the new sequence number. */ seq = info->seq; memset(info, 0, sizeof(*info)); /* * Set the @e fields here so that prb_commit() can be used if * text data allocation fails. */ e->rb = rb; e->id = id; /* * Initialize the sequence number if it has "never been set". * Otherwise just increment it by a full wrap. * * @seq is considered "never been set" if it has a value of 0, * _except_ for @infos[0], which was specially setup by the ringbuffer * initializer and therefore is always considered as set. * * See the "Bootstrap" comment block in printk_ringbuffer.h for * details about how the initializer bootstraps the descriptors. */ if (seq == 0 && DESC_INDEX(desc_ring, id) != 0) info->seq = DESC_INDEX(desc_ring, id); else info->seq = seq + DESCS_COUNT(desc_ring); /* * New data is about to be reserved. Once that happens, previous * descriptors are no longer able to be extended. Finalize the * previous descriptor now so that it can be made available to * readers. (For seq==0 there is no previous descriptor.) */ if (info->seq > 0) desc_make_final(desc_ring, DESC_ID(id - 1)); r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); /* If text data allocation fails, a data-less record is committed. */ if (r->text_buf_size && !r->text_buf) { prb_commit(e); /* prb_commit() re-enabled interrupts. */ goto fail; } r->info = info; /* Record full text space used by record. */ e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); return true; fail: /* Make it clear to the caller that the reserve failed. */ memset(r, 0, sizeof(*r)); return false; } /* Commit the data (possibly finalizing it) and restore interrupts. */ static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; struct prb_desc *d = to_desc(desc_ring, e->id); unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); /* Now the writer has finished all writing: LMM(_prb_commit:A) */ /* * Set the descriptor as committed. See "ABA Issues" about why * cmpxchg() instead of set() is used. * * 1 Guarantee all record data is stored before the descriptor state * is stored as committed. A write memory barrier is sufficient * for this. This pairs with desc_read:B and desc_reopen_last:A. * * 2. Guarantee the descriptor state is stored as committed before * re-checking the head ID in order to possibly finalize this * descriptor. This pairs with desc_reserve:D. * * Memory barrier involvement: * * If prb_commit:A reads from desc_reserve:D, then * desc_make_final:A reads from _prb_commit:B. * * Relies on: * * MB _prb_commit:B to prb_commit:A * matching * MB desc_reserve:D to desc_make_final:A */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ WARN_ON_ONCE(1); } /* Restore interrupts, the reserve/commit window is finished. */ local_irq_restore(e->irqflags); } /** * prb_commit() - Commit (previously reserved) data to the ringbuffer. * * @e: The entry containing the reserved data information. * * This is the public function available to writers to commit data. * * Note that the data is not yet available to readers until it is finalized. * Finalizing happens automatically when space for the next record is * reserved. * * See prb_final_commit() for a version of this function that finalizes * immediately. * * Context: Any context. Enables local interrupts. */ void prb_commit(struct prb_reserved_entry *e) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; unsigned long head_id; _prb_commit(e, desc_committed); /* * If this descriptor is no longer the head (i.e. a new record has * been allocated), extending the data for this record is no longer * allowed and therefore it must be finalized. */ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ if (head_id != e->id) desc_make_final(desc_ring, e->id); } /** * prb_final_commit() - Commit and finalize (previously reserved) data to * the ringbuffer. * * @e: The entry containing the reserved data information. * * This is the public function available to writers to commit+finalize data. * * By finalizing, the data is made immediately available to readers. * * This function should only be used if there are no intentions of extending * this data using prb_reserve_in_last(). * * Context: Any context. Enables local interrupts. */ void prb_final_commit(struct prb_reserved_entry *e) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; _prb_commit(e, desc_finalized); /* Best effort to remember the last finalized @id. */ atomic_long_set(&desc_ring->last_finalized_id, e->id); } /* * Count the number of lines in provided text. All text has at least 1 line * (even if @text_size is 0). Each '\n' processed is counted as an additional * line. */ static unsigned int count_lines(const char *text, unsigned int text_size) { unsigned int next_size = text_size; unsigned int line_count = 1; const char *next = text; while (next_size) { next = memchr(next, '\n', next_size); if (!next) break; line_count++; next++; next_size = text_size - (next - text); } return line_count; } /* * Given @blk_lpos, copy an expected @len of data into the provided buffer. * If @line_count is provided, count the number of lines in the data. * * This function (used by readers) performs strict validation on the data * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is * triggered if an internal error is detected. */ static bool copy_data(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf, unsigned int buf_size, unsigned int *line_count) { unsigned int data_size; const char *data; /* Caller might not want any data. */ if ((!buf || !buf_size) && !line_count) return true; data = get_data(data_ring, blk_lpos, &data_size); if (!data) return false; /* * Actual cannot be less than expected. It can be more than expected * because of the trailing alignment padding. * * Note that invalid @len values can occur because the caller loads * the value during an allowed data race. */ if (data_size < (unsigned int)len) return false; /* Caller interested in the line count? */ if (line_count) *line_count = count_lines(data, len); /* Caller interested in the data content? */ if (!buf || !buf_size) return true; data_size = min_t(unsigned int, buf_size, len); memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */ return true; } /* * This is an extended version of desc_read(). It gets a copy of a specified * descriptor. However, it also verifies that the record is finalized and has * the sequence number @seq. On success, 0 is returned. * * Error return values: * -EINVAL: A finalized record with sequence number @seq does not exist. * -ENOENT: A finalized record with sequence number @seq exists, but its data * is not available. This is a valid record, so readers should * continue with the next record. */ static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, unsigned long id, u64 seq, struct prb_desc *desc_out) { struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos; enum desc_state d_state; u64 s; d_state = desc_read(desc_ring, id, desc_out, &s, NULL); /* * An unexpected @id (desc_miss) or @seq mismatch means the record * does not exist. A descriptor in the reserved or committed state * means the record does not yet exist for the reader. */ if (d_state == desc_miss || d_state == desc_reserved || d_state == desc_committed || s != seq) { return -EINVAL; } /* * A descriptor in the reusable state may no longer have its data * available; report it as existing but with lost data. Or the record * may actually be a record with lost data. */ if (d_state == desc_reusable || (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) { return -ENOENT; } return 0; } /* * Copy the ringbuffer data from the record with @seq to the provided * @r buffer. On success, 0 is returned. * * See desc_read_finalized_seq() for error return values. */ static int prb_read(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r, unsigned int *line_count) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info = to_info(desc_ring, seq); struct prb_desc *rdesc = to_desc(desc_ring, seq); atomic_long_t *state_var = &rdesc->state_var; struct prb_desc desc; unsigned long id; int err; /* Extract the ID, used to specify the descriptor to read. */ id = DESC_ID(atomic_long_read(state_var)); /* Get a local copy of the correct descriptor (if available). */ err = desc_read_finalized_seq(desc_ring, id, seq, &desc); /* * If @r is NULL, the caller is only interested in the availability * of the record. */ if (err || !r) return err; /* If requested, copy meta data. */ if (r->info) memcpy(r->info, info, sizeof(*(r->info))); /* Copy text data. If it fails, this is a data-less record. */ if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len, r->text_buf, r->text_buf_size, line_count)) { return -ENOENT; } /* Ensure the record is still finalized and has the same @seq. */ return desc_read_finalized_seq(desc_ring, id, seq, &desc); } /* Get the sequence number of the tail descriptor. */ static u64 prb_first_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; struct prb_desc desc; unsigned long id; u64 seq; for (;;) { id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */ d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */ /* * This loop will not be infinite because the tail is * _always_ in the finalized or reusable state. */ if (d_state == desc_finalized || d_state == desc_reusable) break; /* * Guarantee the last state load from desc_read() is before * reloading @tail_id in order to see a new tail in the case * that the descriptor has been recycled. This pairs with * desc_reserve:D. * * Memory barrier involvement: * * If prb_first_seq:B reads from desc_reserve:F, then * prb_first_seq:A reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:F * matching * RMB prb_first_seq:B to prb_first_seq:A */ smp_rmb(); /* LMM(prb_first_seq:C) */ } return seq; } /* * Non-blocking read of a record. Updates @seq to the last finalized record * (which may have no data available). * * See the description of prb_read_valid() and prb_read_valid_info() * for details. */ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, struct printk_record *r, unsigned int *line_count) { u64 tail_seq; int err; while ((err = prb_read(rb, *seq, r, line_count))) { tail_seq = prb_first_seq(rb); if (*seq < tail_seq) { /* * Behind the tail. Catch up and try again. This * can happen for -ENOENT and -EINVAL cases. */ *seq = tail_seq; } else if (err == -ENOENT) { /* Record exists, but no data available. Skip. */ (*seq)++; } else { /* Non-existent/non-finalized record. Must stop. */ return false; } } return true; } /** * prb_read_valid() - Non-blocking read of a requested record or (if gone) * the next available record. * * @rb: The ringbuffer to read from. * @seq: The sequence number of the record to read. * @r: A record data buffer to store the read record to. * * This is the public function available to readers to read a record. * * The reader provides the @info and @text_buf buffers of @r to be * filled in. Any of the buffer pointers can be set to NULL if the reader * is not interested in that data. To ensure proper initialization of @r, * prb_rec_init_rd() should be used. * * Context: Any context. * Return: true if a record was read, otherwise false. * * On success, the reader must check r->info.seq to see which record was * actually read. This allows the reader to detect dropped records. * * Failure means @seq refers to a not yet written record. */ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r) { return _prb_read_valid(rb, &seq, r, NULL); } /** * prb_read_valid_info() - Non-blocking read of meta data for a requested * record or (if gone) the next available record. * * @rb: The ringbuffer to read from. * @seq: The sequence number of the record to read. * @info: A buffer to store the read record meta data to. * @line_count: A buffer to store the number of lines in the record text. * * This is the public function available to readers to read only the * meta data of a record. * * The reader provides the @info, @line_count buffers to be filled in. * Either of the buffer pointers can be set to NULL if the reader is not * interested in that data. * * Context: Any context. * Return: true if a record's meta data was read, otherwise false. * * On success, the reader must check info->seq to see which record meta data * was actually read. This allows the reader to detect dropped records. * * Failure means @seq refers to a not yet written record. */ bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, struct printk_info *info, unsigned int *line_count) { struct printk_record r; prb_rec_init_rd(&r, info, NULL, 0); return _prb_read_valid(rb, &seq, &r, line_count); } /** * prb_first_valid_seq() - Get the sequence number of the oldest available * record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what the * first/oldest valid sequence number is. * * This provides readers a starting point to begin iterating the ringbuffer. * * Context: Any context. * Return: The sequence number of the first/oldest record or, if the * ringbuffer is empty, 0 is returned. */ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) { u64 seq = 0; if (!_prb_read_valid(rb, &seq, NULL, NULL)) return 0; return seq; } /** * prb_next_seq() - Get the sequence number after the last available record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what the next * newest sequence number available to readers will be. * * This provides readers a sequence number to jump to if all currently * available records should be skipped. * * Context: Any context. * Return: The sequence number of the next newest (not yet available) record * for readers. */ u64 prb_next_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; unsigned long id; u64 seq; /* Check if the cached @id still points to a valid @seq. */ id = atomic_long_read(&desc_ring->last_finalized_id); d_state = desc_read(desc_ring, id, NULL, &seq, NULL); if (d_state == desc_finalized || d_state == desc_reusable) { /* * Begin searching after the last finalized record. * * On 0, the search must begin at 0 because of hack#2 * of the bootstrapping phase it is not known if a * record at index 0 exists. */ if (seq != 0) seq++; } else { /* * The information about the last finalized sequence number * has gone. It should happen only when there is a flood of * new messages and the ringbuffer is rapidly recycled. * Give up and start from the beginning. */ seq = 0; } /* * The information about the last finalized @seq might be inaccurate. * Search forward to find the current one. */ while (_prb_read_valid(rb, &seq, NULL, NULL)) seq++; return seq; } /** * prb_init() - Initialize a ringbuffer to use provided external buffers. * * @rb: The ringbuffer to initialize. * @text_buf: The data buffer for text data. * @textbits: The size of @text_buf as a power-of-2 value. * @descs: The descriptor buffer for ringbuffer records. * @descbits: The count of @descs items as a power-of-2 value. * @infos: The printk_info buffer for ringbuffer records. * * This is the public function available to writers to setup a ringbuffer * during runtime using provided buffers. * * This must match the initialization of DEFINE_PRINTKRB(). * * Context: Any context. */ void prb_init(struct printk_ringbuffer *rb, char *text_buf, unsigned int textbits, struct prb_desc *descs, unsigned int descbits, struct printk_info *infos) { memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0])); memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0])); rb->desc_ring.count_bits = descbits; rb->desc_ring.descs = descs; rb->desc_ring.infos = infos; atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits)); rb->text_data_ring.size_bits = textbits; rb->text_data_ring.data = text_buf; atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits)); atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits)); atomic_long_set(&rb->fail, 0); atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; infos[0].seq = -(u64)_DESCS_COUNT(descbits); infos[_DESCS_COUNT(descbits) - 1].seq = 0; } /** * prb_record_text_space() - Query the full actual used ringbuffer space for * the text data of a reserved entry. * * @e: The successfully reserved entry to query. * * This is the public function available to writers to see how much actual * space is used in the ringbuffer to store the text data of the specified * entry. * * This function is only valid if @e has been successfully reserved using * prb_reserve(). * * Context: Any context. * Return: The size in bytes used by the text data of the associated record. */ unsigned int prb_record_text_space(struct prb_reserved_entry *e) { return e->text_space; } |
25 3 35 26 23 19 22 23 29 29 11 12 9 16 16 15 1 6 5 1 1 1 1 1 1 1 1 2 2 2 2 3 2 2 2 16 7 12 15 15 26 14 17 26 15 8 26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 | // SPDX-License-Identifier: GPL-2.0+ /* * NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. */ #include <linux/fs.h> #include <linux/string.h> #include <linux/errno.h> #include "nilfs.h" #include "bmap.h" #include "btree.h" #include "direct.h" #include "btnode.h" #include "mdt.h" #include "dat.h" #include "alloc.h" struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) { struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info; return nilfs->ns_dat; } static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, const char *fname, int err) { struct inode *inode = bmap->b_inode; if (err == -EINVAL) { __nilfs_error(inode->i_sb, fname, "broken bmap (inode number=%lu)", inode->i_ino); err = -EIO; } return err; } /** * nilfs_bmap_lookup_at_level - find a data block or node block * @bmap: bmap * @key: key * @level: level * @ptrp: place to store the value associated to @key * * Description: nilfs_bmap_lookup_at_level() finds a record whose key * matches @key in the block at @level of the bmap. * * Return Value: On success, 0 is returned and the record associated with @key * is stored in the place pointed by @ptrp. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - A record associated with @key does not exist. */ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, __u64 *ptrp) { sector_t blocknr; int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); if (ret < 0) goto out; if (NILFS_BMAP_USE_VBN(bmap)) { ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, &blocknr); if (!ret) *ptrp = blocknr; else if (ret == -ENOENT) { /* * If there was no valid entry in DAT for the block * address obtained by b_ops->bop_lookup, then pass * internal code -EINVAL to nilfs_bmap_convert_error * to treat it as metadata corruption. */ ret = -EINVAL; } } out: up_read(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, unsigned int maxblocks) { int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); up_read(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) { __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; __u64 ptrs[NILFS_BMAP_SMALL_HIGH + 1]; int ret, n; if (bmap->b_ops->bop_check_insert != NULL) { ret = bmap->b_ops->bop_check_insert(bmap, key); if (ret > 0) { n = bmap->b_ops->bop_gather_data( bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1); if (n < 0) return n; ret = nilfs_btree_convert_and_insert( bmap, key, ptr, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags |= NILFS_BMAP_LARGE; return ret; } else if (ret < 0) return ret; } return bmap->b_ops->bop_insert(bmap, key, ptr); } /** * nilfs_bmap_insert - insert a new key-record pair into a bmap * @bmap: bmap * @key: key * @rec: record * * Description: nilfs_bmap_insert() inserts the new key-record pair specified * by @key and @rec into @bmap. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-EEXIST - A record associated with @key already exist. */ int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_do_insert(bmap, key, rec); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) { __u64 keys[NILFS_BMAP_LARGE_LOW + 1]; __u64 ptrs[NILFS_BMAP_LARGE_LOW + 1]; int ret, n; if (bmap->b_ops->bop_check_delete != NULL) { ret = bmap->b_ops->bop_check_delete(bmap, key); if (ret > 0) { n = bmap->b_ops->bop_gather_data( bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1); if (n < 0) return n; ret = nilfs_direct_delete_and_convert( bmap, key, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; return ret; } else if (ret < 0) return ret; } return bmap->b_ops->bop_delete(bmap, key); } /** * nilfs_bmap_seek_key - seek a valid entry and return its key * @bmap: bmap struct * @start: start key number * @keyp: place to store valid key * * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap * starting from @start, and stores it to @keyp if found. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - No valid entry was found */ int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp) { int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_seek_key(bmap, start, keyp); up_read(&bmap->b_sem); if (ret < 0) ret = nilfs_bmap_convert_error(bmap, __func__, ret); return ret; } int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp) { int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_last_key(bmap, keyp); up_read(&bmap->b_sem); if (ret < 0) ret = nilfs_bmap_convert_error(bmap, __func__, ret); return ret; } /** * nilfs_bmap_delete - delete a key-record pair from a bmap * @bmap: bmap * @key: key * * Description: nilfs_bmap_delete() deletes the key-record pair specified by * @key from @bmap. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - A record associated with @key does not exist. */ int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_do_delete(bmap, key); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key) { __u64 lastkey; int ret; ret = bmap->b_ops->bop_last_key(bmap, &lastkey); if (ret < 0) { if (ret == -ENOENT) ret = 0; return ret; } while (key <= lastkey) { ret = nilfs_bmap_do_delete(bmap, lastkey); if (ret < 0) return ret; ret = bmap->b_ops->bop_last_key(bmap, &lastkey); if (ret < 0) { if (ret == -ENOENT) ret = 0; return ret; } } return 0; } /** * nilfs_bmap_truncate - truncate a bmap to a specified key * @bmap: bmap * @key: key * * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are * greater than or equal to @key from @bmap. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_do_truncate(bmap, key); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_clear - free resources a bmap holds * @bmap: bmap * * Description: nilfs_bmap_clear() frees resources associated with @bmap. */ void nilfs_bmap_clear(struct nilfs_bmap *bmap) { down_write(&bmap->b_sem); if (bmap->b_ops->bop_clear != NULL) bmap->b_ops->bop_clear(bmap); up_write(&bmap->b_sem); } /** * nilfs_bmap_propagate - propagate dirty state * @bmap: bmap * @bh: buffer head * * Description: nilfs_bmap_propagate() marks the buffers that directly or * indirectly refer to the block specified by @bh dirty. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) { int ret; down_write(&bmap->b_sem); ret = bmap->b_ops->bop_propagate(bmap, bh); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_lookup_dirty_buffers - * @bmap: bmap * @listp: pointer to buffer head list */ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, struct list_head *listp) { if (bmap->b_ops->bop_lookup_dirty_buffers != NULL) bmap->b_ops->bop_lookup_dirty_buffers(bmap, listp); } /** * nilfs_bmap_assign - assign a new block number to a block * @bmap: bmap * @bh: pointer to buffer head * @blocknr: block number * @binfo: block information * * Description: nilfs_bmap_assign() assigns the block number @blocknr to the * buffer specified by @bh. * * Return Value: On success, 0 is returned and the buffer head of a newly * create buffer and the block information associated with the buffer are * stored in the place pointed by @bh and @binfo, respectively. On error, one * of the following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_assign(struct nilfs_bmap *bmap, struct buffer_head **bh, unsigned long blocknr, union nilfs_binfo *binfo) { int ret; down_write(&bmap->b_sem); ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_mark - mark block dirty * @bmap: bmap * @key: key * @level: level * * Description: nilfs_bmap_mark() marks the block specified by @key and @level * as dirty. * * Return Value: On success, 0 is returned. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) { int ret; if (bmap->b_ops->bop_mark == NULL) return 0; down_write(&bmap->b_sem); ret = bmap->b_ops->bop_mark(bmap, key, level); up_write(&bmap->b_sem); return nilfs_bmap_convert_error(bmap, __func__, ret); } /** * nilfs_bmap_test_and_clear_dirty - test and clear a bmap dirty state * @bmap: bmap * * Description: nilfs_test_and_clear() is the atomic operation to test and * clear the dirty state of @bmap. * * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. */ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) { int ret; down_write(&bmap->b_sem); ret = nilfs_bmap_dirty(bmap); nilfs_bmap_clear_dirty(bmap); up_write(&bmap->b_sem); return ret; } /* * Internal use only */ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { struct buffer_head *pbh; __u64 key; key = page_index(bh->b_page) << (PAGE_SHIFT - bmap->b_inode->i_blkbits); for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) key++; return key; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) { __s64 diff; diff = key - bmap->b_last_allocated_key; if ((nilfs_bmap_keydiff_abs(diff) < NILFS_INODE_BMAP_SIZE) && (bmap->b_last_allocated_ptr != NILFS_BMAP_INVALID_PTR) && (bmap->b_last_allocated_ptr + diff > 0)) return bmap->b_last_allocated_ptr + diff; else return NILFS_BMAP_INVALID_PTR; } #define NILFS_BMAP_GROUP_DIV 8 __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) { struct inode *dat = nilfs_bmap_get_dat(bmap); unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat); unsigned long group = bmap->b_inode->i_ino / entries_per_group; return group * entries_per_group + (bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) * (entries_per_group / NILFS_BMAP_GROUP_DIV); } static struct lock_class_key nilfs_bmap_dat_lock_key; static struct lock_class_key nilfs_bmap_mdt_lock_key; /** * nilfs_bmap_read - read a bmap from an inode * @bmap: bmap * @raw_inode: on-disk inode * * Description: nilfs_bmap_read() initializes the bmap @bmap. * * Return Value: On success, 0 is returned. On error, the following negative * error code is returned. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { if (raw_inode == NULL) memset(bmap->b_u.u_data, 0, NILFS_BMAP_SIZE); else memcpy(bmap->b_u.u_data, raw_inode->i_bmap, NILFS_BMAP_SIZE); init_rwsem(&bmap->b_sem); bmap->b_state = 0; bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; switch (bmap->b_inode->i_ino) { case NILFS_DAT_INO: bmap->b_ptr_type = NILFS_BMAP_PTR_P; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); break; case NILFS_CPFILE_INO: case NILFS_SUFILE_INO: bmap->b_ptr_type = NILFS_BMAP_PTR_VS; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); break; case NILFS_IFILE_INO: lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key); fallthrough; default: bmap->b_ptr_type = NILFS_BMAP_PTR_VM; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; break; } return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? nilfs_btree_init(bmap) : nilfs_direct_init(bmap); } /** * nilfs_bmap_write - write back a bmap to an inode * @bmap: bmap * @raw_inode: on-disk inode * * Description: nilfs_bmap_write() stores @bmap in @raw_inode. */ void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { down_write(&bmap->b_sem); memcpy(raw_inode->i_bmap, bmap->b_u.u_data, NILFS_INODE_BMAP_SIZE * sizeof(__le64)); if (bmap->b_inode->i_ino == NILFS_DAT_INO) bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; up_write(&bmap->b_sem); } void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) { memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); init_rwsem(&bmap->b_sem); bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; bmap->b_ptr_type = NILFS_BMAP_PTR_U; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; bmap->b_state = 0; nilfs_btree_init_gc(bmap); } void nilfs_bmap_save(const struct nilfs_bmap *bmap, struct nilfs_bmap_store *store) { memcpy(store->data, bmap->b_u.u_data, sizeof(store->data)); store->last_allocated_key = bmap->b_last_allocated_key; store->last_allocated_ptr = bmap->b_last_allocated_ptr; store->state = bmap->b_state; } void nilfs_bmap_restore(struct nilfs_bmap *bmap, const struct nilfs_bmap_store *store) { memcpy(bmap->b_u.u_data, store->data, sizeof(store->data)); bmap->b_last_allocated_key = store->last_allocated_key; bmap->b_last_allocated_ptr = store->last_allocated_ptr; bmap->b_state = store->state; } |
2 11 1 1 1 9 4 6 1 1 10 10 6 1 1 3 7 2 1 8 9 9 9 1 8 8 1 8 1 26 21 10 11 5 9 19 1 14 4 12 3 12 1 11 2 2 11 1 1 1 1 1 3 1 1 1 17 17 12 12 1 12 30 29 12 12 7 28 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ #include <linux/slab.h> #include <linux/compat.h> #include <linux/cred.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/msdos_fs.h> #include <linux/writeback.h> #include "exfat_raw.h" #include "exfat_fs.h" static int exfat_cont_expand(struct inode *inode, loff_t size) { int ret; unsigned int num_clusters, new_num_clusters, last_clu; struct exfat_inode_info *ei = EXFAT_I(inode); struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_chain clu; ret = inode_newsize_ok(inode, size); if (ret) return ret; num_clusters = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); new_num_clusters = EXFAT_B_TO_CLU_ROUND_UP(size, sbi); if (new_num_clusters == num_clusters) goto out; if (num_clusters) { exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags); ret = exfat_find_last_cluster(sb, &clu, &last_clu); if (ret) return ret; clu.dir = last_clu + 1; } else { last_clu = EXFAT_EOF_CLUSTER; clu.dir = EXFAT_EOF_CLUSTER; } clu.size = 0; clu.flags = ei->flags; ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters, &clu, IS_DIRSYNC(inode)); if (ret) return ret; /* Append new clusters to chain */ if (num_clusters) { if (clu.flags != ei->flags) if (exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters)) goto free_clu; if (clu.flags == ALLOC_FAT_CHAIN) if (exfat_ent_set(sb, last_clu, clu.dir)) goto free_clu; } else ei->start_clu = clu.dir; ei->flags = clu.flags; out: inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); /* Expanded range not zeroed, do not update valid_size */ i_size_write(inode, size); ei->i_size_aligned = round_up(size, sb->s_blocksize); ei->i_size_ondisk = ei->i_size_aligned; inode->i_blocks = round_up(size, sbi->cluster_size) >> 9; if (IS_DIRSYNC(inode)) return write_inode_now(inode, 1); mark_inode_dirty(inode); return 0; free_clu: exfat_free_cluster(inode, &clu); return -EIO; } static bool exfat_allow_set_time(struct exfat_sb_info *sbi, struct inode *inode) { mode_t allow_utime = sbi->options.allow_utime; if (!uid_eq(current_fsuid(), inode->i_uid)) { if (in_group_p(inode->i_gid)) allow_utime >>= 3; if (allow_utime & MAY_WRITE) return true; } /* use a default check */ return false; } static int exfat_sanitize_mode(const struct exfat_sb_info *sbi, struct inode *inode, umode_t *mode_ptr) { mode_t i_mode, mask, perm; i_mode = inode->i_mode; mask = (S_ISREG(i_mode) || S_ISLNK(i_mode)) ? sbi->options.fs_fmask : sbi->options.fs_dmask; perm = *mode_ptr & ~(S_IFMT | mask); /* Of the r and x bits, all (subject to umask) must be present.*/ if ((perm & 0555) != (i_mode & 0555)) return -EPERM; if (exfat_mode_can_hold_ro(inode)) { /* * Of the w bits, either all (subject to umask) or none must * be present. */ if ((perm & 0222) && ((perm & 0222) != (0222 & ~mask))) return -EPERM; } else { /* * If exfat_mode_can_hold_ro(inode) is false, can't change * w bits. */ if ((perm & 0222) != (0222 & ~mask)) return -EPERM; } *mode_ptr &= S_IFMT | perm; return 0; } /* resize the file length */ int __exfat_truncate(struct inode *inode) { unsigned int num_clusters_new, num_clusters_phys; unsigned int last_clu = EXFAT_FREE_CLUSTER; struct exfat_chain clu; struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); /* check if the given file ID is opened */ if (ei->type != TYPE_FILE && ei->type != TYPE_DIR) return -EPERM; exfat_set_volume_dirty(sb); num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); if (i_size_read(inode) > 0) { /* * Truncate FAT chain num_clusters after the first cluster * num_clusters = min(new, phys); */ unsigned int num_clusters = min(num_clusters_new, num_clusters_phys); /* * Follow FAT chain * (defensive coding - works fine even with corrupted FAT table */ if (clu.flags == ALLOC_NO_FAT_CHAIN) { clu.dir += num_clusters; clu.size -= num_clusters; } else { while (num_clusters > 0) { last_clu = clu.dir; if (exfat_get_next_cluster(sb, &(clu.dir))) return -EIO; num_clusters--; clu.size--; } } } else { ei->flags = ALLOC_NO_FAT_CHAIN; ei->start_clu = EXFAT_EOF_CLUSTER; } if (i_size_read(inode) < ei->valid_size) ei->valid_size = i_size_read(inode); if (ei->type == TYPE_FILE) ei->attr |= EXFAT_ATTR_ARCHIVE; /* * update the directory entry * * If the directory entry is updated by mark_inode_dirty(), the * directory entry will be written after a writeback cycle of * updating the bitmap/FAT, which may result in clusters being * freed but referenced by the directory entry in the event of a * sudden power failure. * __exfat_write_inode() is called for directory entry, bitmap * and FAT to be written in a same writeback. */ if (__exfat_write_inode(inode, inode_needs_sync(inode))) return -EIO; /* cut off from the FAT chain */ if (ei->flags == ALLOC_FAT_CHAIN && last_clu != EXFAT_FREE_CLUSTER && last_clu != EXFAT_EOF_CLUSTER) { if (exfat_ent_set(sb, last_clu, EXFAT_EOF_CLUSTER)) return -EIO; } /* invalidate cache and free the clusters */ /* clear exfat cache */ exfat_cache_inval_inode(inode); /* hint information */ ei->hint_bmap.off = EXFAT_EOF_CLUSTER; ei->hint_bmap.clu = EXFAT_EOF_CLUSTER; /* hint_stat will be used if this is directory. */ ei->hint_stat.eidx = 0; ei->hint_stat.clu = ei->start_clu; ei->hint_femp.eidx = EXFAT_HINT_NONE; /* free the clusters */ if (exfat_free_cluster(inode, &clu)) return -EIO; return 0; } void exfat_truncate(struct inode *inode) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *ei = EXFAT_I(inode); unsigned int blocksize = i_blocksize(inode); loff_t aligned_size; int err; mutex_lock(&sbi->s_lock); if (ei->start_clu == 0) { /* * Empty start_clu != ~0 (not allocated) */ exfat_fs_error(sb, "tried to truncate zeroed cluster."); goto write_size; } err = __exfat_truncate(inode); if (err) goto write_size; inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9; write_size: aligned_size = i_size_read(inode); if (aligned_size & (blocksize - 1)) { aligned_size |= (blocksize - 1); aligned_size++; } if (ei->i_size_ondisk > i_size_read(inode)) ei->i_size_ondisk = aligned_size; if (ei->i_size_aligned > i_size_read(inode)) ei->i_size_aligned = aligned_size; mutex_unlock(&sbi->s_lock); } int exfat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, unsigned int request_mask, unsigned int query_flags) { struct inode *inode = d_backing_inode(path->dentry); struct exfat_inode_info *ei = EXFAT_I(inode); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); exfat_truncate_atime(&stat->atime); stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = ei->i_crtime.tv_sec; stat->btime.tv_nsec = ei->i_crtime.tv_nsec; stat->blksize = EXFAT_SB(inode->i_sb)->cluster_size; return 0; } int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; unsigned int ia_valid; int error; if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > i_size_read(inode)) { error = exfat_cont_expand(inode, attr->ia_size); if (error || attr->ia_valid == ATTR_SIZE) return error; attr->ia_valid &= ~ATTR_SIZE; } /* Check for setting the inode time. */ ia_valid = attr->ia_valid; if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) && exfat_allow_set_time(sbi, inode)) { attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET); } error = setattr_prepare(&nop_mnt_idmap, dentry, attr); attr->ia_valid = ia_valid; if (error) goto out; if (((attr->ia_valid & ATTR_UID) && !uid_eq(attr->ia_uid, sbi->options.fs_uid)) || ((attr->ia_valid & ATTR_GID) && !gid_eq(attr->ia_gid, sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | 0777)))) { error = -EPERM; goto out; } /* * We don't return -EPERM here. Yes, strange, but this is too * old behavior. */ if (attr->ia_valid & ATTR_MODE) { if (exfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) attr->ia_valid &= ~ATTR_MODE; } if (attr->ia_valid & ATTR_SIZE) inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); setattr_copy(&nop_mnt_idmap, inode, attr); exfat_truncate_inode_atime(inode); if (attr->ia_valid & ATTR_SIZE) { error = exfat_block_truncate_page(inode, attr->ia_size); if (error) goto out; down_write(&EXFAT_I(inode)->truncate_lock); truncate_setsize(inode, attr->ia_size); /* * __exfat_write_inode() is called from exfat_truncate(), inode * is already written by it, so mark_inode_dirty() is unneeded. */ exfat_truncate(inode); up_write(&EXFAT_I(inode)->truncate_lock); } else mark_inode_dirty(inode); out: return error; } /* * modified ioctls from fat/file.c by Welmer Almesberger */ static int exfat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { u32 attr; inode_lock_shared(inode); attr = exfat_make_attr(inode); inode_unlock_shared(inode); return put_user(attr, user_attr); } static int exfat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) { struct inode *inode = file_inode(file); struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); int is_dir = S_ISDIR(inode->i_mode); u32 attr, oldattr; struct iattr ia; int err; err = get_user(attr, user_attr); if (err) goto out; err = mnt_want_write_file(file); if (err) goto out; inode_lock(inode); oldattr = exfat_make_attr(inode); /* * Mask attributes so we don't set reserved fields. */ attr &= (EXFAT_ATTR_READONLY | EXFAT_ATTR_HIDDEN | EXFAT_ATTR_SYSTEM | EXFAT_ATTR_ARCHIVE); attr |= (is_dir ? EXFAT_ATTR_SUBDIR : 0); /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; ia.ia_ctime = current_time(inode); if (is_dir) ia.ia_mode = exfat_make_mode(sbi, attr, 0777); else ia.ia_mode = exfat_make_mode(sbi, attr, 0666 | (inode->i_mode & 0111)); /* The root directory has no attributes */ if (inode->i_ino == EXFAT_ROOT_INO && attr != EXFAT_ATTR_SUBDIR) { err = -EINVAL; goto out_unlock_inode; } if (((attr | oldattr) & EXFAT_ATTR_SYSTEM) && !capable(CAP_LINUX_IMMUTABLE)) { err = -EPERM; goto out_unlock_inode; } /* * The security check is questionable... We single * out the RO attribute for checking by the security * module, just because it maps to a file mode. */ err = security_inode_setattr(file_mnt_idmap(file), file->f_path.dentry, &ia); if (err) goto out_unlock_inode; /* This MUST be done before doing anything irreversible... */ err = exfat_setattr(file_mnt_idmap(file), file->f_path.dentry, &ia); if (err) goto out_unlock_inode; fsnotify_change(file->f_path.dentry, ia.ia_valid); exfat_save_attr(inode, attr); mark_inode_dirty(inode); out_unlock_inode: inode_unlock(inode); mnt_drop_write_file(file); out: return err; } static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg) { struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!bdev_max_discard_sectors(inode->i_sb->s_bdev)) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; range.minlen = max_t(unsigned int, range.minlen, bdev_discard_granularity(inode->i_sb->s_bdev)); ret = exfat_trim_fs(inode, &range); if (ret < 0) return ret; if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; return 0; } long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); u32 __user *user_attr = (u32 __user *)arg; switch (cmd) { case FAT_IOCTL_GET_ATTRIBUTES: return exfat_ioctl_get_attributes(inode, user_attr); case FAT_IOCTL_SET_ATTRIBUTES: return exfat_ioctl_set_attributes(filp, user_attr); case FITRIM: return exfat_ioctl_fitrim(inode, arg); default: return -ENOTTY; } } #ifdef CONFIG_COMPAT long exfat_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { return exfat_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); } #endif int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *inode = filp->f_mapping->host; int err; err = __generic_file_fsync(filp, start, end, datasync); if (err) return err; err = sync_blockdev(inode->i_sb->s_bdev); if (err) return err; return blkdev_issue_flush(inode->i_sb->s_bdev); } static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end) { int err; struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; const struct address_space_operations *ops = mapping->a_ops; while (start < end) { u32 zerofrom, len; struct page *page = NULL; zerofrom = start & (PAGE_SIZE - 1); len = PAGE_SIZE - zerofrom; if (start + len > end) len = end - start; err = ops->write_begin(file, mapping, start, len, &page, NULL); if (err) goto out; zero_user_segment(page, zerofrom, zerofrom + len); err = ops->write_end(file, mapping, start, len, len, page, NULL); if (err < 0) goto out; start += len; balance_dirty_pages_ratelimited(mapping); cond_resched(); } out: return err; } static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter) { ssize_t ret; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct exfat_inode_info *ei = EXFAT_I(inode); loff_t pos = iocb->ki_pos; loff_t valid_size; inode_lock(inode); valid_size = ei->valid_size; ret = generic_write_checks(iocb, iter); if (ret < 0) goto unlock; if (pos > valid_size) { ret = exfat_file_zeroed_range(file, valid_size, pos); if (ret < 0 && ret != -ENOSPC) { exfat_err(inode->i_sb, "write: fail to zero from %llu to %llu(%zd)", valid_size, pos, ret); } if (ret < 0) goto unlock; } ret = __generic_file_write_iter(iocb, iter); if (ret < 0) goto unlock; inode_unlock(inode); if (pos > valid_size) pos = valid_size; if (iocb_is_dsync(iocb) && iocb->ki_pos > pos) { ssize_t err = vfs_fsync_range(file, pos, iocb->ki_pos - 1, iocb->ki_flags & IOCB_SYNC); if (err < 0) return err; } return ret; unlock: inode_unlock(inode); return ret; } static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma) { int ret; struct inode *inode = file_inode(file); struct exfat_inode_info *ei = EXFAT_I(inode); loff_t start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT); loff_t end = min_t(loff_t, i_size_read(inode), start + vma->vm_end - vma->vm_start); if ((vma->vm_flags & VM_WRITE) && ei->valid_size < end) { ret = exfat_file_zeroed_range(file, ei->valid_size, end); if (ret < 0) { exfat_err(inode->i_sb, "mmap: fail to zero from %llu to %llu(%d)", start, end, ret); return ret; } } return generic_file_mmap(file, vma); } const struct file_operations exfat_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = exfat_file_write_iter, .unlocked_ioctl = exfat_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = exfat_compat_ioctl, #endif .mmap = exfat_file_mmap, .fsync = exfat_file_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, }; const struct inode_operations exfat_file_inode_operations = { .setattr = exfat_setattr, .getattr = exfat_getattr, }; |
93 93 93 87 93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-lshift.c - MPI helper functions * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" /* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left * and store the USIZE least significant digits of the result at WP. * Return the bits shifted out from the most significant digit. * * Argument constraints: * 1. 0 < CNT < BITS_PER_MP_LIMB * 2. If the result is to be written over the input, WP must be >= UP. */ mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt) { mpi_limb_t high_limb, low_limb; unsigned sh_1, sh_2; mpi_size_t i; mpi_limb_t retval; sh_1 = cnt; wp += 1; sh_2 = BITS_PER_MPI_LIMB - sh_1; i = usize - 1; low_limb = up[i]; retval = low_limb >> sh_2; high_limb = low_limb; while (--i >= 0) { low_limb = up[i]; wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); high_limb = low_limb; } wp[i] = high_limb << sh_1; return retval; } |
1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 | // SPDX-License-Identifier: GPL-2.0-only /* * IPV4 GSO/GRO offload support * Linux INET implementation * * Copyright (C) 2016 secunet Security Networks AG * Author: Steffen Klassert <steffen.klassert@secunet.com> * * ESP GRO support */ #include <linux/skbuff.h> #include <linux/init.h> #include <net/protocol.h> #include <crypto/aead.h> #include <crypto/authenc.h> #include <linux/err.h> #include <linux/module.h> #include <net/gro.h> #include <net/gso.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> #include <linux/scatterlist.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/udp.h> static struct sk_buff *esp4_gro_receive(struct list_head *head, struct sk_buff *skb) { int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; int encap_type = 0; __be32 seq; __be32 spi; if (!pskb_pull(skb, offset)) return NULL; if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0) goto out; xo = xfrm_offload(skb); if (!xo || !(xo->flags & CRYPTO_DONE)) { struct sec_path *sp = secpath_set(skb); if (!sp) goto out; if (sp->len == XFRM_MAX_DEPTH) goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ip_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET); if (!x) goto out_reset; skb->mark = xfrm_smark_get(skb->mark, x); sp->xvec[sp->len++] = x; sp->olen++; xo = xfrm_offload(skb); if (!xo) goto out_reset; } xo->flags |= XFRM_GRO; if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) encap_type = UDP_ENCAP_ESPINUDP; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); XFRM_SPI_SKB_CB(skb)->seq = seq; /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; return NULL; } static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; struct iphdr *iph = ip_hdr(skb); struct xfrm_offload *xo = xfrm_offload(skb); int proto = iph->protocol; skb_push(skb, -skb_network_offset(skb)); esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); xo->proto = proto; } static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { __be16 type = x->inner_mode.family == AF_INET6 ? htons(ETH_P_IPV6) : htons(ETH_P_IP); return skb_eth_gso_segment(skb, features, type); } static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { const struct net_offload *ops; struct sk_buff *segs = ERR_PTR(-EINVAL); struct xfrm_offload *xo = xfrm_offload(skb); skb->transport_header += x->props.header_len; ops = rcu_dereference(inet_offloads[xo->proto]); if (likely(ops && ops->callbacks.gso_segment)) segs = ops->callbacks.gso_segment(skb, features); return segs; } static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { struct xfrm_offload *xo = xfrm_offload(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; u8 proto = xo->proto; skb->transport_header += x->props.header_len; if (x->sel.family != AF_INET6) { if (proto == IPPROTO_BEETPH) { struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; skb->transport_header += ph->hdrlen * 8; proto = ph->nexthdr; } else { skb->transport_header -= IPV4_BEET_PHMAXLEN; } } else { __be16 frag; skb->transport_header += ipv6_skip_exthdr(skb, 0, &proto, &frag); if (proto == IPPROTO_TCP) skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; } if (proto == IPPROTO_IPV6) skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; __skb_pull(skb, skb_transport_offset(skb)); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) segs = ops->callbacks.gso_segment(skb, features); return segs; } static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { switch (x->outer_mode.encap) { case XFRM_MODE_TUNNEL: return xfrm4_tunnel_gso_segment(x, skb, features); case XFRM_MODE_TRANSPORT: return xfrm4_transport_gso_segment(x, skb, features); case XFRM_MODE_BEET: return xfrm4_beet_gso_segment(x, skb, features); } return ERR_PTR(-EOPNOTSUPP); } static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct xfrm_state *x; struct ip_esp_hdr *esph; struct crypto_aead *aead; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; if (!xo) return ERR_PTR(-EINVAL); if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) return ERR_PTR(-EINVAL); sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); if (esph->spi != x->id.spi) return ERR_PTR(-EINVAL); if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) return ERR_PTR(-EINVAL); __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); skb->encap_hdr_csum = 1; if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) && !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC); else if (!(features & NETIF_F_HW_ESP_TX_CSUM) && !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM)) esp_features = features & ~(NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC); xo->flags |= XFRM_GSO_SEGMENT; return xfrm4_outer_mode_gso_segment(x, skb, esp_features); } static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) { struct crypto_aead *aead = x->data; struct xfrm_offload *xo = xfrm_offload(skb); if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead))) return -EINVAL; if (!(xo->flags & CRYPTO_DONE)) skb->ip_summed = CHECKSUM_NONE; return esp_input_done2(skb, 0); } static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { int err; int alen; int blksize; struct xfrm_offload *xo; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; __u32 seq; esp.inplace = true; xo = xfrm_offload(skb); if (!xo) return -EINVAL; if ((!(features & NETIF_F_HW_ESP) && !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) { xo->flags |= CRYPTO_FALLBACK; hw_offload = false; } esp.proto = xo->proto; /* skb is pure payload to encrypt */ aead = x->data; alen = crypto_aead_authsize(aead); esp.tfclen = 0; /* XXX: Add support for tfc padding here. */ blksize = ALIGN(crypto_aead_blocksize(aead), 4); esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize); esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; esp.esph = ip_esp_hdr(skb); if (!hw_offload || !skb_is_gso(skb)) { esp.nfrags = esp_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; } seq = xo->seq.low; esph = esp.esph; esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { esph->seq_no = htonl(seq); if (!skb_is_gso(skb)) xo->seq.low++; else xo->seq.low += skb_shinfo(skb)->gso_segs; } if (xo->seq.low < seq) xo->seq.hi++; esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); if (hw_offload) { if (!skb_ext_add(skb, SKB_EXT_SEC_PATH)) return -ENOMEM; xo = xfrm_offload(skb); if (!xo) return -EINVAL; xo->flags |= XFRM_XMIT; return 0; } err = esp_output_tail(x, skb, &esp); if (err) return err; secpath_reset(skb); if (skb_needs_linearize(skb, skb->dev->features) && __skb_linearize(skb)) return -ENOMEM; return 0; } static const struct net_offload esp4_offload = { .callbacks = { .gro_receive = esp4_gro_receive, .gso_segment = esp4_gso_segment, }, }; static const struct xfrm_type_offload esp_type_offload = { .owner = THIS_MODULE, .proto = IPPROTO_ESP, .input_tail = esp_input_tail, .xmit = esp_xmit, .encap = esp4_gso_encap, }; static int __init esp4_offload_init(void) { if (xfrm_register_type_offload(&esp_type_offload, AF_INET) < 0) { pr_info("%s: can't add xfrm type offload\n", __func__); return -EAGAIN; } return inet_add_offload(&esp4_offload, IPPROTO_ESP); } static void __exit esp4_offload_exit(void) { xfrm_unregister_type_offload(&esp_type_offload, AF_INET); inet_del_offload(&esp4_offload, IPPROTO_ESP); } module_init(esp4_offload_init); module_exit(esp4_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP); MODULE_DESCRIPTION("IPV4 GSO/GRO offload support"); |
61 9 52 6 6 6 6 3 2 2 3 2 2 1 1 11 12 3 9 2 1 1 3 2 1 1 9 1 1 2 1 2 2 3 3 3 3 9 9 4 3 2 6 6 3 2 1 3 6 8 2 6 4 7 3 13 13 12 2 10 71 8 63 1 2 12 2 3 8 9 6 12 4 9 26 2 16 18 7 11 46 2 3 6 28 5 2 35 18 4 9 30 5 66 5 2 9 51 15 42 15 42 9 48 62 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 | // SPDX-License-Identifier: GPL-2.0 /* * Quota code necessary even when VFS quota support is not compiled * into the kernel. The interesting stuff is over in dquot.c, here * we have symbols for initial quotactl(2) handling, the sysctl(2) * variables, etc - things needed even when quota support disabled. */ #include <linux/fs.h> #include <linux/namei.h> #include <linux/slab.h> #include <asm/current.h> #include <linux/blkdev.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/types.h> #include <linux/mount.h> #include <linux/writeback.h> #include <linux/nospec.h> #include "compat.h" #include "../internal.h" static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) { switch (cmd) { /* these commands do not require any special privilegues */ case Q_GETFMT: case Q_SYNC: case Q_GETINFO: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XQUOTASYNC: break; /* allow to query information for dquots we "own" */ case Q_GETQUOTA: case Q_XGETQUOTA: if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) || (type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id)))) break; fallthrough; default: if (!capable(CAP_SYS_ADMIN)) return -EPERM; } return security_quotactl(cmd, type, id, sb); } static void quota_sync_one(struct super_block *sb, void *arg) { int type = *(int *)arg; if (sb->s_qcop && sb->s_qcop->quota_sync && (sb->s_quota_types & (1 << type))) sb->s_qcop->quota_sync(sb, type); } static int quota_sync_all(int type) { int ret; ret = security_quotactl(Q_SYNC, type, 0, NULL); if (!ret) iterate_supers(quota_sync_one, &type); return ret; } unsigned int qtype_enforce_flag(int type) { switch (type) { case USRQUOTA: return FS_QUOTA_UDQ_ENFD; case GRPQUOTA: return FS_QUOTA_GDQ_ENFD; case PRJQUOTA: return FS_QUOTA_PDQ_ENFD; } return 0; } static int quota_quotaon(struct super_block *sb, int type, qid_t id, const struct path *path) { if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) return -ENOSYS; if (sb->s_qcop->quota_enable) return sb->s_qcop->quota_enable(sb, qtype_enforce_flag(type)); if (IS_ERR(path)) return PTR_ERR(path); return sb->s_qcop->quota_on(sb, type, id, path); } static int quota_quotaoff(struct super_block *sb, int type) { if (!sb->s_qcop->quota_off && !sb->s_qcop->quota_disable) return -ENOSYS; if (sb->s_qcop->quota_disable) return sb->s_qcop->quota_disable(sb, qtype_enforce_flag(type)); return sb->s_qcop->quota_off(sb, type); } static int quota_getfmt(struct super_block *sb, int type, void __user *addr) { __u32 fmt; if (!sb_has_quota_active(sb, type)) return -ESRCH; fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; } static int quota_getinfo(struct super_block *sb, int type, void __user *addr) { struct qc_state state; struct qc_type_state *tstate; struct if_dqinfo uinfo; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; ret = sb->s_qcop->get_state(sb, &state); if (ret) return ret; tstate = state.s_state + type; if (!(tstate->flags & QCI_ACCT_ENABLED)) return -ESRCH; memset(&uinfo, 0, sizeof(uinfo)); uinfo.dqi_bgrace = tstate->spc_timelimit; uinfo.dqi_igrace = tstate->ino_timelimit; if (tstate->flags & QCI_SYSFILE) uinfo.dqi_flags |= DQF_SYS_FILE; if (tstate->flags & QCI_ROOT_SQUASH) uinfo.dqi_flags |= DQF_ROOT_SQUASH; uinfo.dqi_valid = IIF_ALL; if (copy_to_user(addr, &uinfo, sizeof(uinfo))) return -EFAULT; return 0; } static int quota_setinfo(struct super_block *sb, int type, void __user *addr) { struct if_dqinfo info; struct qc_info qinfo; if (copy_from_user(&info, addr, sizeof(info))) return -EFAULT; if (!sb->s_qcop->set_info) return -ENOSYS; if (info.dqi_valid & ~(IIF_FLAGS | IIF_BGRACE | IIF_IGRACE)) return -EINVAL; memset(&qinfo, 0, sizeof(qinfo)); if (info.dqi_valid & IIF_FLAGS) { if (info.dqi_flags & ~DQF_SETINFO_MASK) return -EINVAL; if (info.dqi_flags & DQF_ROOT_SQUASH) qinfo.i_flags |= QCI_ROOT_SQUASH; qinfo.i_fieldmask |= QC_FLAGS; } if (info.dqi_valid & IIF_BGRACE) { qinfo.i_spc_timelimit = info.dqi_bgrace; qinfo.i_fieldmask |= QC_SPC_TIMER; } if (info.dqi_valid & IIF_IGRACE) { qinfo.i_ino_timelimit = info.dqi_igrace; qinfo.i_fieldmask |= QC_INO_TIMER; } return sb->s_qcop->set_info(sb, type, &qinfo); } static inline qsize_t qbtos(qsize_t blocks) { return blocks << QIF_DQBLKSIZE_BITS; } static inline qsize_t stoqb(qsize_t space) { return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; } static void copy_to_if_dqblk(struct if_dqblk *dst, struct qc_dqblk *src) { memset(dst, 0, sizeof(*dst)); dst->dqb_bhardlimit = stoqb(src->d_spc_hardlimit); dst->dqb_bsoftlimit = stoqb(src->d_spc_softlimit); dst->dqb_curspace = src->d_space; dst->dqb_ihardlimit = src->d_ino_hardlimit; dst->dqb_isoftlimit = src->d_ino_softlimit; dst->dqb_curinodes = src->d_ino_count; dst->dqb_btime = src->d_spc_timer; dst->dqb_itime = src->d_ino_timer; dst->dqb_valid = QIF_ALL; } static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; struct qc_dqblk fdq; struct if_dqblk idq; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); if (ret) return ret; copy_to_if_dqblk(&idq, &fdq); if (compat_need_64bit_alignment_fixup()) { struct compat_if_dqblk __user *compat_dqblk = addr; if (copy_to_user(compat_dqblk, &idq, sizeof(*compat_dqblk))) return -EFAULT; if (put_user(idq.dqb_valid, &compat_dqblk->dqb_valid)) return -EFAULT; } else { if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; } return 0; } /* * Return quota for next active quota >= this id, if any exists, * otherwise return -ENOENT via ->get_nextdqblk */ static int quota_getnextquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; struct qc_dqblk fdq; struct if_nextdqblk idq; int ret; if (!sb->s_qcop->get_nextdqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq); if (ret) return ret; /* struct if_nextdqblk is a superset of struct if_dqblk */ copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq); idq.dqb_id = from_kqid(current_user_ns(), qid); if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; return 0; } static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) { dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); dst->d_spc_softlimit = qbtos(src->dqb_bsoftlimit); dst->d_space = src->dqb_curspace; dst->d_ino_hardlimit = src->dqb_ihardlimit; dst->d_ino_softlimit = src->dqb_isoftlimit; dst->d_ino_count = src->dqb_curinodes; dst->d_spc_timer = src->dqb_btime; dst->d_ino_timer = src->dqb_itime; dst->d_fieldmask = 0; if (src->dqb_valid & QIF_BLIMITS) dst->d_fieldmask |= QC_SPC_SOFT | QC_SPC_HARD; if (src->dqb_valid & QIF_SPACE) dst->d_fieldmask |= QC_SPACE; if (src->dqb_valid & QIF_ILIMITS) dst->d_fieldmask |= QC_INO_SOFT | QC_INO_HARD; if (src->dqb_valid & QIF_INODES) dst->d_fieldmask |= QC_INO_COUNT; if (src->dqb_valid & QIF_BTIME) dst->d_fieldmask |= QC_SPC_TIMER; if (src->dqb_valid & QIF_ITIME) dst->d_fieldmask |= QC_INO_TIMER; } static int quota_setquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct qc_dqblk fdq; struct if_dqblk idq; struct kqid qid; if (compat_need_64bit_alignment_fixup()) { struct compat_if_dqblk __user *compat_dqblk = addr; if (copy_from_user(&idq, compat_dqblk, sizeof(*compat_dqblk)) || get_user(idq.dqb_valid, &compat_dqblk->dqb_valid)) return -EFAULT; } else { if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; } if (!sb->s_qcop->set_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; copy_from_if_dqblk(&fdq, &idq); return sb->s_qcop->set_dqblk(sb, qid, &fdq); } static int quota_enable(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->quota_enable) return -ENOSYS; return sb->s_qcop->quota_enable(sb, flags); } static int quota_disable(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->quota_disable) return -ENOSYS; return sb->s_qcop->quota_disable(sb, flags); } static int quota_state_to_flags(struct qc_state *state) { int flags = 0; if (state->s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_UDQ_ACCT; if (state->s_state[USRQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_UDQ_ENFD; if (state->s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_GDQ_ACCT; if (state->s_state[GRPQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_GDQ_ENFD; if (state->s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_PDQ_ACCT; if (state->s_state[PRJQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_PDQ_ENFD; return flags; } static int quota_getstate(struct super_block *sb, int type, struct fs_quota_stat *fqs) { struct qc_state state; int ret; memset(&state, 0, sizeof (struct qc_state)); ret = sb->s_qcop->get_state(sb, &state); if (ret < 0) return ret; memset(fqs, 0, sizeof(*fqs)); fqs->qs_version = FS_QSTAT_VERSION; fqs->qs_flags = quota_state_to_flags(&state); /* No quota enabled? */ if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit; fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit; /* Inodes may be allocated even if inactive; copy out if present */ if (state.s_state[USRQUOTA].ino) { fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino; fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks; fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents; } if (state.s_state[GRPQUOTA].ino) { fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents; } if (state.s_state[PRJQUOTA].ino) { /* * Q_XGETQSTAT doesn't have room for both group and project * quotas. So, allow the project quota values to be copied out * only if there is no group quota information available. */ if (!(state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)) { fqs->qs_gquota.qfs_ino = state.s_state[PRJQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[PRJQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[PRJQUOTA].nextents; } } return 0; } static int compat_copy_fs_qfilestat(struct compat_fs_qfilestat __user *to, struct fs_qfilestat *from) { if (copy_to_user(to, from, sizeof(*to)) || put_user(from->qfs_nextents, &to->qfs_nextents)) return -EFAULT; return 0; } static int compat_copy_fs_quota_stat(struct compat_fs_quota_stat __user *to, struct fs_quota_stat *from) { if (put_user(from->qs_version, &to->qs_version) || put_user(from->qs_flags, &to->qs_flags) || put_user(from->qs_pad, &to->qs_pad) || compat_copy_fs_qfilestat(&to->qs_uquota, &from->qs_uquota) || compat_copy_fs_qfilestat(&to->qs_gquota, &from->qs_gquota) || put_user(from->qs_incoredqs, &to->qs_incoredqs) || put_user(from->qs_btimelimit, &to->qs_btimelimit) || put_user(from->qs_itimelimit, &to->qs_itimelimit) || put_user(from->qs_rtbtimelimit, &to->qs_rtbtimelimit) || put_user(from->qs_bwarnlimit, &to->qs_bwarnlimit) || put_user(from->qs_iwarnlimit, &to->qs_iwarnlimit)) return -EFAULT; return 0; } static int quota_getxstate(struct super_block *sb, int type, void __user *addr) { struct fs_quota_stat fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; ret = quota_getstate(sb, type, &fqs); if (ret) return ret; if (compat_need_64bit_alignment_fixup()) return compat_copy_fs_quota_stat(addr, &fqs); if (copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return 0; } static int quota_getstatev(struct super_block *sb, int type, struct fs_quota_statv *fqs) { struct qc_state state; int ret; memset(&state, 0, sizeof (struct qc_state)); ret = sb->s_qcop->get_state(sb, &state); if (ret < 0) return ret; memset(fqs, 0, sizeof(*fqs)); fqs->qs_version = FS_QSTAT_VERSION; fqs->qs_flags = quota_state_to_flags(&state); /* No quota enabled? */ if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit; fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit; fqs->qs_rtbwarnlimit = state.s_state[type].rt_spc_warnlimit; /* Inodes may be allocated even if inactive; copy out if present */ if (state.s_state[USRQUOTA].ino) { fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino; fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks; fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents; } if (state.s_state[GRPQUOTA].ino) { fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents; } if (state.s_state[PRJQUOTA].ino) { fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino; fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks; fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents; } return 0; } static int quota_getxstatev(struct super_block *sb, int type, void __user *addr) { struct fs_quota_statv fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; memset(&fqs, 0, sizeof(fqs)); if (copy_from_user(&fqs, addr, 1)) /* Just read qs_version */ return -EFAULT; /* If this kernel doesn't support user specified version, fail */ switch (fqs.qs_version) { case FS_QSTATV_VERSION1: break; default: return -EINVAL; } ret = quota_getstatev(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; } /* * XFS defines BBTOB and BTOBB macros inside fs/xfs/ and we cannot move them * out of there as xfsprogs rely on definitions being in that header file. So * just define same functions here for quota purposes. */ #define XFS_BB_SHIFT 9 static inline u64 quota_bbtob(u64 blocks) { return blocks << XFS_BB_SHIFT; } static inline u64 quota_btobb(u64 bytes) { return (bytes + (1 << XFS_BB_SHIFT) - 1) >> XFS_BB_SHIFT; } static inline s64 copy_from_xfs_dqblk_ts(const struct fs_disk_quota *d, __s32 timer, __s8 timer_hi) { if (d->d_fieldmask & FS_DQ_BIGTIME) return (u32)timer | (s64)timer_hi << 32; return timer; } static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src) { dst->d_spc_hardlimit = quota_bbtob(src->d_blk_hardlimit); dst->d_spc_softlimit = quota_bbtob(src->d_blk_softlimit); dst->d_ino_hardlimit = src->d_ino_hardlimit; dst->d_ino_softlimit = src->d_ino_softlimit; dst->d_space = quota_bbtob(src->d_bcount); dst->d_ino_count = src->d_icount; dst->d_ino_timer = copy_from_xfs_dqblk_ts(src, src->d_itimer, src->d_itimer_hi); dst->d_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_btimer, src->d_btimer_hi); dst->d_ino_warns = src->d_iwarns; dst->d_spc_warns = src->d_bwarns; dst->d_rt_spc_hardlimit = quota_bbtob(src->d_rtb_hardlimit); dst->d_rt_spc_softlimit = quota_bbtob(src->d_rtb_softlimit); dst->d_rt_space = quota_bbtob(src->d_rtbcount); dst->d_rt_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_rtbtimer, src->d_rtbtimer_hi); dst->d_rt_spc_warns = src->d_rtbwarns; dst->d_fieldmask = 0; if (src->d_fieldmask & FS_DQ_ISOFT) dst->d_fieldmask |= QC_INO_SOFT; if (src->d_fieldmask & FS_DQ_IHARD) dst->d_fieldmask |= QC_INO_HARD; if (src->d_fieldmask & FS_DQ_BSOFT) dst->d_fieldmask |= QC_SPC_SOFT; if (src->d_fieldmask & FS_DQ_BHARD) dst->d_fieldmask |= QC_SPC_HARD; if (src->d_fieldmask & FS_DQ_RTBSOFT) dst->d_fieldmask |= QC_RT_SPC_SOFT; if (src->d_fieldmask & FS_DQ_RTBHARD) dst->d_fieldmask |= QC_RT_SPC_HARD; if (src->d_fieldmask & FS_DQ_BTIMER) dst->d_fieldmask |= QC_SPC_TIMER; if (src->d_fieldmask & FS_DQ_ITIMER) dst->d_fieldmask |= QC_INO_TIMER; if (src->d_fieldmask & FS_DQ_RTBTIMER) dst->d_fieldmask |= QC_RT_SPC_TIMER; if (src->d_fieldmask & FS_DQ_BWARNS) dst->d_fieldmask |= QC_SPC_WARNS; if (src->d_fieldmask & FS_DQ_IWARNS) dst->d_fieldmask |= QC_INO_WARNS; if (src->d_fieldmask & FS_DQ_RTBWARNS) dst->d_fieldmask |= QC_RT_SPC_WARNS; if (src->d_fieldmask & FS_DQ_BCOUNT) dst->d_fieldmask |= QC_SPACE; if (src->d_fieldmask & FS_DQ_ICOUNT) dst->d_fieldmask |= QC_INO_COUNT; if (src->d_fieldmask & FS_DQ_RTBCOUNT) dst->d_fieldmask |= QC_RT_SPACE; } static void copy_qcinfo_from_xfs_dqblk(struct qc_info *dst, struct fs_disk_quota *src) { memset(dst, 0, sizeof(*dst)); dst->i_spc_timelimit = src->d_btimer; dst->i_ino_timelimit = src->d_itimer; dst->i_rt_spc_timelimit = src->d_rtbtimer; dst->i_ino_warnlimit = src->d_iwarns; dst->i_spc_warnlimit = src->d_bwarns; dst->i_rt_spc_warnlimit = src->d_rtbwarns; if (src->d_fieldmask & FS_DQ_BWARNS) dst->i_fieldmask |= QC_SPC_WARNS; if (src->d_fieldmask & FS_DQ_IWARNS) dst->i_fieldmask |= QC_INO_WARNS; if (src->d_fieldmask & FS_DQ_RTBWARNS) dst->i_fieldmask |= QC_RT_SPC_WARNS; if (src->d_fieldmask & FS_DQ_BTIMER) dst->i_fieldmask |= QC_SPC_TIMER; if (src->d_fieldmask & FS_DQ_ITIMER) dst->i_fieldmask |= QC_INO_TIMER; if (src->d_fieldmask & FS_DQ_RTBTIMER) dst->i_fieldmask |= QC_RT_SPC_TIMER; } static int quota_setxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; if (!sb->s_qcop->set_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; /* Are we actually setting timer / warning limits for all users? */ if (from_kqid(sb->s_user_ns, qid) == 0 && fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) { struct qc_info qinfo; int ret; if (!sb->s_qcop->set_info) return -EINVAL; copy_qcinfo_from_xfs_dqblk(&qinfo, &fdq); ret = sb->s_qcop->set_info(sb, type, &qinfo); if (ret) return ret; /* These are already done */ fdq.d_fieldmask &= ~(FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK); } copy_from_xfs_dqblk(&qdq, &fdq); return sb->s_qcop->set_dqblk(sb, qid, &qdq); } static inline void copy_to_xfs_dqblk_ts(const struct fs_disk_quota *d, __s32 *timer_lo, __s8 *timer_hi, s64 timer) { *timer_lo = timer; if (d->d_fieldmask & FS_DQ_BIGTIME) *timer_hi = timer >> 32; } static inline bool want_bigtime(s64 timer) { return timer > S32_MAX || timer < S32_MIN; } static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src, int type, qid_t id) { memset(dst, 0, sizeof(*dst)); if (want_bigtime(src->d_ino_timer) || want_bigtime(src->d_spc_timer) || want_bigtime(src->d_rt_spc_timer)) dst->d_fieldmask |= FS_DQ_BIGTIME; dst->d_version = FS_DQUOT_VERSION; dst->d_id = id; if (type == USRQUOTA) dst->d_flags = FS_USER_QUOTA; else if (type == PRJQUOTA) dst->d_flags = FS_PROJ_QUOTA; else dst->d_flags = FS_GROUP_QUOTA; dst->d_blk_hardlimit = quota_btobb(src->d_spc_hardlimit); dst->d_blk_softlimit = quota_btobb(src->d_spc_softlimit); dst->d_ino_hardlimit = src->d_ino_hardlimit; dst->d_ino_softlimit = src->d_ino_softlimit; dst->d_bcount = quota_btobb(src->d_space); dst->d_icount = src->d_ino_count; copy_to_xfs_dqblk_ts(dst, &dst->d_itimer, &dst->d_itimer_hi, src->d_ino_timer); copy_to_xfs_dqblk_ts(dst, &dst->d_btimer, &dst->d_btimer_hi, src->d_spc_timer); dst->d_iwarns = src->d_ino_warns; dst->d_bwarns = src->d_spc_warns; dst->d_rtb_hardlimit = quota_btobb(src->d_rt_spc_hardlimit); dst->d_rtb_softlimit = quota_btobb(src->d_rt_spc_softlimit); dst->d_rtbcount = quota_btobb(src->d_rt_space); copy_to_xfs_dqblk_ts(dst, &dst->d_rtbtimer, &dst->d_rtbtimer_hi, src->d_rt_spc_timer); dst->d_rtbwarns = src->d_rt_spc_warns; } static int quota_getxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_dqblk(sb, qid, &qdq); if (ret) return ret; copy_to_xfs_dqblk(&fdq, &qdq, type, id); if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } /* * Return quota for next active quota >= this id, if any exists, * otherwise return -ENOENT via ->get_nextdqblk. */ static int quota_getnextxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; qid_t id_out; int ret; if (!sb->s_qcop->get_nextdqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq); if (ret) return ret; id_out = from_kqid(current_user_ns(), qid); copy_to_xfs_dqblk(&fdq, &qdq, type, id_out); if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } static int quota_rmxquota(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->rm_xquota) return -ENOSYS; return sb->s_qcop->rm_xquota(sb, flags); } /* Copy parameters and call proper function */ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr, const struct path *path) { int ret; type = array_index_nospec(type, MAXQUOTAS); /* * Quota not supported on this fs? Check this before s_quota_types * since they needn't be set if quota is not supported at all. */ if (!sb->s_qcop) return -ENOSYS; if (!(sb->s_quota_types & (1 << type))) return -EINVAL; ret = check_quotactl_permission(sb, type, cmd, id); if (ret < 0) return ret; switch (cmd) { case Q_QUOTAON: return quota_quotaon(sb, type, id, path); case Q_QUOTAOFF: return quota_quotaoff(sb, type); case Q_GETFMT: return quota_getfmt(sb, type, addr); case Q_GETINFO: return quota_getinfo(sb, type, addr); case Q_SETINFO: return quota_setinfo(sb, type, addr); case Q_GETQUOTA: return quota_getquota(sb, type, id, addr); case Q_GETNEXTQUOTA: return quota_getnextquota(sb, type, id, addr); case Q_SETQUOTA: return quota_setquota(sb, type, id, addr); case Q_SYNC: if (!sb->s_qcop->quota_sync) return -ENOSYS; return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: return quota_enable(sb, addr); case Q_XQUOTAOFF: return quota_disable(sb, addr); case Q_XQUOTARM: return quota_rmxquota(sb, addr); case Q_XGETQSTAT: return quota_getxstate(sb, type, addr); case Q_XGETQSTATV: return quota_getxstatev(sb, type, addr); case Q_XSETQLIM: return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: return quota_getxquota(sb, type, id, addr); case Q_XGETNEXTQUOTA: return quota_getnextxquota(sb, type, id, addr); case Q_XQUOTASYNC: if (sb_rdonly(sb)) return -EROFS; /* XFS quotas are fully coherent now, making this call a noop */ return 0; default: return -EINVAL; } } /* Return 1 if 'cmd' will block on frozen filesystem */ static int quotactl_cmd_write(int cmd) { /* * We cannot allow Q_GETQUOTA and Q_GETNEXTQUOTA without write access * as dquot_acquire() may allocate space for new structure and OCFS2 * needs to increment on-disk use count. */ switch (cmd) { case Q_GETFMT: case Q_GETINFO: case Q_SYNC: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XGETQUOTA: case Q_XGETNEXTQUOTA: case Q_XQUOTASYNC: return 0; } return 1; } /* Return true if quotactl command is manipulating quota on/off state */ static bool quotactl_cmd_onoff(int cmd) { return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF) || (cmd == Q_XQUOTAON) || (cmd == Q_XQUOTAOFF); } /* * look up a superblock on which quota ops will be performed * - use the name of a block device to find the superblock thereon */ static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK struct super_block *sb; struct filename *tmp = getname(special); bool excl = false, thawed = false; int error; dev_t dev; if (IS_ERR(tmp)) return ERR_CAST(tmp); error = lookup_bdev(tmp->name, &dev); putname(tmp); if (error) return ERR_PTR(error); if (quotactl_cmd_onoff(cmd)) { excl = true; thawed = true; } else if (quotactl_cmd_write(cmd)) { thawed = true; } retry: sb = user_get_super(dev, excl); if (!sb) return ERR_PTR(-ENODEV); if (thawed && sb->s_writers.frozen != SB_UNFROZEN) { if (excl) up_write(&sb->s_umount); else up_read(&sb->s_umount); /* Wait for sb to unfreeze */ sb_start_write(sb); sb_end_write(sb); put_super(sb); goto retry; } return sb; #else return ERR_PTR(-ENODEV); #endif } /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota * calls. Maybe we need to add the process quotas etc. in the future, * but we probably should use rlimits for that. */ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr) { uint cmds, type; struct super_block *sb = NULL; struct path path, *pathp = NULL; int ret; cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; if (type >= MAXQUOTAS) return -EINVAL; /* * As a special case Q_SYNC can be called without a specific device. * It will iterate all superblocks that have quota enabled and call * the sync action on each of them. */ if (!special) { if (cmds == Q_SYNC) return quota_sync_all(type); return -ENODEV; } /* * Path for quotaon has to be resolved before grabbing superblock * because that gets s_umount sem which is also possibly needed by path * resolution (think about autofs) and thus deadlocks could arise. */ if (cmds == Q_QUOTAON) { ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); if (ret) pathp = ERR_PTR(ret); else pathp = &path; } sb = quotactl_block(special, cmds); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto out; } ret = do_quotactl(sb, type, cmds, id, addr, pathp); if (!quotactl_cmd_onoff(cmds)) drop_super(sb); else drop_super_exclusive(sb); out: if (pathp && !IS_ERR(pathp)) path_put(pathp); return ret; } SYSCALL_DEFINE4(quotactl_fd, unsigned int, fd, unsigned int, cmd, qid_t, id, void __user *, addr) { struct super_block *sb; unsigned int cmds = cmd >> SUBCMDSHIFT; unsigned int type = cmd & SUBCMDMASK; struct fd f; int ret; f = fdget_raw(fd); if (!f.file) return -EBADF; ret = -EINVAL; if (type >= MAXQUOTAS) goto out; if (quotactl_cmd_write(cmds)) { ret = mnt_want_write(f.file->f_path.mnt); if (ret) goto out; } sb = f.file->f_path.mnt->mnt_sb; if (quotactl_cmd_onoff(cmds)) down_write(&sb->s_umount); else down_read(&sb->s_umount); ret = do_quotactl(sb, type, cmds, id, addr, ERR_PTR(-EINVAL)); if (quotactl_cmd_onoff(cmds)) up_write(&sb->s_umount); else up_read(&sb->s_umount); if (quotactl_cmd_write(cmds)) mnt_drop_write(f.file->f_path.mnt); out: fdput(f); return ret; } |
1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 | // SPDX-License-Identifier: GPL-2.0 /****************************************************************************** * rtl871x_mlme.c * * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved. * Linux device driver for RTL8192SU * * Modifications for inclusion into the Linux staging tree are * Copyright(c) 2010 Larry Finger. All rights reserved. * * Contact information: * WLAN FAE <wlanfae@realtek.com> * Larry Finger <Larry.Finger@lwfinger.net> * ******************************************************************************/ #define _RTL871X_MLME_C_ #include <linux/etherdevice.h> #include "osdep_service.h" #include "drv_types.h" #include "recv_osdep.h" #include "xmit_osdep.h" #include "mlme_osdep.h" #include "sta_info.h" #include "wifi.h" #include "wlan_bssdef.h" static void update_ht_cap(struct _adapter *padapter, u8 *pie, uint ie_len); int r8712_init_mlme_priv(struct _adapter *padapter) { sint i; u8 *pbuf; struct wlan_network *pnetwork; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; memset((u8 *)pmlmepriv, 0, sizeof(struct mlme_priv)); pmlmepriv->nic_hdl = (u8 *)padapter; pmlmepriv->pscanned = NULL; pmlmepriv->fw_state = 0; pmlmepriv->cur_network.network.InfrastructureMode = Ndis802_11AutoUnknown; /* Maybe someday we should rename this variable to "active_mode"(Jeff)*/ pmlmepriv->passive_mode = 1; /* 1: active, 0: passive. */ spin_lock_init(&(pmlmepriv->lock)); spin_lock_init(&(pmlmepriv->lock2)); _init_queue(&(pmlmepriv->free_bss_pool)); _init_queue(&(pmlmepriv->scanned_queue)); set_scanned_network_val(pmlmepriv, 0); memset(&pmlmepriv->assoc_ssid, 0, sizeof(struct ndis_802_11_ssid)); pbuf = kmalloc_array(MAX_BSS_CNT, sizeof(struct wlan_network), GFP_ATOMIC); if (!pbuf) return -ENOMEM; pmlmepriv->free_bss_buf = pbuf; pnetwork = (struct wlan_network *)pbuf; for (i = 0; i < MAX_BSS_CNT; i++) { INIT_LIST_HEAD(&(pnetwork->list)); list_add_tail(&(pnetwork->list), &(pmlmepriv->free_bss_pool.queue)); pnetwork++; } pmlmepriv->sitesurveyctrl.last_rx_pkts = 0; pmlmepriv->sitesurveyctrl.last_tx_pkts = 0; pmlmepriv->sitesurveyctrl.traffic_busy = false; /* allocate DMA-able/Non-Page memory for cmd_buf and rsp_buf */ r8712_init_mlme_timer(padapter); return 0; } struct wlan_network *_r8712_alloc_network(struct mlme_priv *pmlmepriv) { unsigned long irqL; struct wlan_network *pnetwork; struct __queue *free_queue = &pmlmepriv->free_bss_pool; spin_lock_irqsave(&free_queue->lock, irqL); pnetwork = list_first_entry_or_null(&free_queue->queue, struct wlan_network, list); if (pnetwork) { list_del_init(&pnetwork->list); pnetwork->last_scanned = jiffies; pmlmepriv->num_of_scanned++; } spin_unlock_irqrestore(&free_queue->lock, irqL); return pnetwork; } static void _free_network(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwork) { u32 curr_time, delta_time; unsigned long irqL; struct __queue *free_queue = &(pmlmepriv->free_bss_pool); if (!pnetwork) return; if (pnetwork->fixed) return; curr_time = jiffies; delta_time = (curr_time - (u32)pnetwork->last_scanned) / HZ; if (delta_time < SCANQUEUE_LIFETIME) return; spin_lock_irqsave(&free_queue->lock, irqL); list_del_init(&pnetwork->list); list_add_tail(&pnetwork->list, &free_queue->queue); pmlmepriv->num_of_scanned--; spin_unlock_irqrestore(&free_queue->lock, irqL); } static void free_network_nolock(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwork) { struct __queue *free_queue = &pmlmepriv->free_bss_pool; if (!pnetwork) return; if (pnetwork->fixed) return; list_del_init(&pnetwork->list); list_add_tail(&pnetwork->list, &free_queue->queue); pmlmepriv->num_of_scanned--; } /* return the wlan_network with the matching addr * Shall be called under atomic context... * to avoid possible racing condition... */ static struct wlan_network *r8712_find_network(struct __queue *scanned_queue, u8 *addr) { unsigned long irqL; struct list_head *phead, *plist; struct wlan_network *pnetwork = NULL; if (is_zero_ether_addr(addr)) return NULL; spin_lock_irqsave(&scanned_queue->lock, irqL); phead = &scanned_queue->queue; list_for_each(plist, phead) { pnetwork = list_entry(plist, struct wlan_network, list); if (!memcmp(addr, pnetwork->network.MacAddress, ETH_ALEN)) break; } if (plist == phead) pnetwork = NULL; spin_unlock_irqrestore(&scanned_queue->lock, irqL); return pnetwork; } void r8712_free_network_queue(struct _adapter *padapter) { unsigned long irqL; struct list_head *phead, *plist; struct wlan_network *pnetwork; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct __queue *scanned_queue = &pmlmepriv->scanned_queue; spin_lock_irqsave(&scanned_queue->lock, irqL); phead = &scanned_queue->queue; plist = phead->next; while (!end_of_queue_search(phead, plist)) { pnetwork = container_of(plist, struct wlan_network, list); plist = plist->next; _free_network(pmlmepriv, pnetwork); } spin_unlock_irqrestore(&scanned_queue->lock, irqL); } sint r8712_if_up(struct _adapter *padapter) { sint res; if (padapter->driver_stopped || padapter->surprise_removed || !check_fwstate(&padapter->mlmepriv, _FW_LINKED)) { res = false; } else { res = true; } return res; } void r8712_generate_random_ibss(u8 *pibss) { u32 curtime = jiffies; pibss[0] = 0x02; /*in ad-hoc mode bit1 must set to 1 */ pibss[1] = 0x11; pibss[2] = 0x87; pibss[3] = (u8)(curtime & 0xff); pibss[4] = (u8)((curtime >> 8) & 0xff); pibss[5] = (u8)((curtime >> 16) & 0xff); } uint r8712_get_wlan_bssid_ex_sz(struct wlan_bssid_ex *bss) { return sizeof(*bss) + bss->IELength - MAX_IE_SZ; } u8 *r8712_get_capability_from_ie(u8 *ie) { return ie + 8 + 2; } void r8712_free_mlme_priv(struct mlme_priv *pmlmepriv) { kfree(pmlmepriv->free_bss_buf); } static struct wlan_network *alloc_network(struct mlme_priv *pmlmepriv) { return _r8712_alloc_network(pmlmepriv); } int r8712_is_same_ibss(struct _adapter *adapter, struct wlan_network *pnetwork) { int ret = true; struct security_priv *psecuritypriv = &adapter->securitypriv; if ((psecuritypriv->PrivacyAlgrthm != _NO_PRIVACY_) && (pnetwork->network.Privacy == cpu_to_le32(0))) ret = false; else if ((psecuritypriv->PrivacyAlgrthm == _NO_PRIVACY_) && (pnetwork->network.Privacy == cpu_to_le32(1))) ret = false; else ret = true; return ret; } static int is_same_network(struct wlan_bssid_ex *src, struct wlan_bssid_ex *dst) { u16 s_cap, d_cap; memcpy((u8 *)&s_cap, r8712_get_capability_from_ie(src->IEs), 2); memcpy((u8 *)&d_cap, r8712_get_capability_from_ie(dst->IEs), 2); return (src->Ssid.SsidLength == dst->Ssid.SsidLength) && (src->Configuration.DSConfig == dst->Configuration.DSConfig) && ((!memcmp(src->MacAddress, dst->MacAddress, ETH_ALEN))) && ((!memcmp(src->Ssid.Ssid, dst->Ssid.Ssid, src->Ssid.SsidLength))) && ((s_cap & WLAN_CAPABILITY_IBSS) == (d_cap & WLAN_CAPABILITY_IBSS)) && ((s_cap & WLAN_CAPABILITY_ESS) == (d_cap & WLAN_CAPABILITY_ESS)); } struct wlan_network *r8712_get_oldest_wlan_network( struct __queue *scanned_queue) { struct list_head *plist, *phead; struct wlan_network *pwlan = NULL; struct wlan_network *oldest = NULL; phead = &scanned_queue->queue; plist = phead->next; while (1) { if (end_of_queue_search(phead, plist)) break; pwlan = container_of(plist, struct wlan_network, list); if (!pwlan->fixed) { if (!oldest || time_after((unsigned long)oldest->last_scanned, (unsigned long)pwlan->last_scanned)) oldest = pwlan; } plist = plist->next; } return oldest; } static void update_network(struct wlan_bssid_ex *dst, struct wlan_bssid_ex *src, struct _adapter *padapter) { u32 last_evm = 0, tmpVal; struct smooth_rssi_data *sqd = &padapter->recvpriv.signal_qual_data; if (check_fwstate(&padapter->mlmepriv, _FW_LINKED) && is_same_network(&(padapter->mlmepriv.cur_network.network), src)) { if (padapter->recvpriv.signal_qual_data.total_num++ >= PHY_LINKQUALITY_SLID_WIN_MAX) { padapter->recvpriv.signal_qual_data.total_num = PHY_LINKQUALITY_SLID_WIN_MAX; last_evm = sqd->elements[sqd->index]; padapter->recvpriv.signal_qual_data.total_val -= last_evm; } padapter->recvpriv.signal_qual_data.total_val += src->Rssi; sqd->elements[sqd->index++] = src->Rssi; if (padapter->recvpriv.signal_qual_data.index >= PHY_LINKQUALITY_SLID_WIN_MAX) padapter->recvpriv.signal_qual_data.index = 0; /* <1> Showed on UI for user, in percentage. */ tmpVal = padapter->recvpriv.signal_qual_data.total_val / padapter->recvpriv.signal_qual_data.total_num; padapter->recvpriv.signal = (u8)tmpVal; src->Rssi = padapter->recvpriv.signal; } else { src->Rssi = (src->Rssi + dst->Rssi) / 2; } memcpy((u8 *)dst, (u8 *)src, r8712_get_wlan_bssid_ex_sz(src)); } static void update_current_network(struct _adapter *adapter, struct wlan_bssid_ex *pnetwork) { struct mlme_priv *pmlmepriv = &adapter->mlmepriv; if (is_same_network(&(pmlmepriv->cur_network.network), pnetwork)) { update_network(&(pmlmepriv->cur_network.network), pnetwork, adapter); r8712_update_protection(adapter, (pmlmepriv->cur_network.network.IEs) + sizeof(struct NDIS_802_11_FIXED_IEs), pmlmepriv->cur_network.network.IELength); } } /* Caller must hold pmlmepriv->lock first */ static void update_scanned_network(struct _adapter *adapter, struct wlan_bssid_ex *target) { struct list_head *plist, *phead; u32 bssid_ex_sz; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; struct __queue *queue = &pmlmepriv->scanned_queue; struct wlan_network *pnetwork = NULL; struct wlan_network *oldest = NULL; phead = &queue->queue; plist = phead->next; while (1) { if (end_of_queue_search(phead, plist)) break; pnetwork = container_of(plist, struct wlan_network, list); if (is_same_network(&pnetwork->network, target)) break; if ((oldest == ((struct wlan_network *)0)) || time_after((unsigned long)oldest->last_scanned, (unsigned long)pnetwork->last_scanned)) oldest = pnetwork; plist = plist->next; } /* If we didn't find a match, then get a new network slot to initialize * with this beacon's information */ if (end_of_queue_search(phead, plist)) { if (list_empty(&pmlmepriv->free_bss_pool.queue)) { /* If there are no more slots, expire the oldest */ pnetwork = oldest; target->Rssi = (pnetwork->network.Rssi + target->Rssi) / 2; memcpy(&pnetwork->network, target, r8712_get_wlan_bssid_ex_sz(target)); pnetwork->last_scanned = jiffies; } else { /* Otherwise just pull from the free list */ /* update scan_time */ pnetwork = alloc_network(pmlmepriv); if (!pnetwork) return; bssid_ex_sz = r8712_get_wlan_bssid_ex_sz(target); target->Length = bssid_ex_sz; memcpy(&pnetwork->network, target, bssid_ex_sz); list_add_tail(&pnetwork->list, &queue->queue); } } else { /* we have an entry and we are going to update it. But * this entry may be already expired. In this case we * do the same as we found a new net and call the new_net * handler */ update_network(&pnetwork->network, target, adapter); pnetwork->last_scanned = jiffies; } } static void rtl8711_add_network(struct _adapter *adapter, struct wlan_bssid_ex *pnetwork) { unsigned long irqL; struct mlme_priv *pmlmepriv = &(((struct _adapter *)adapter)->mlmepriv); struct __queue *queue = &pmlmepriv->scanned_queue; spin_lock_irqsave(&queue->lock, irqL); update_current_network(adapter, pnetwork); update_scanned_network(adapter, pnetwork); spin_unlock_irqrestore(&queue->lock, irqL); } /*select the desired network based on the capability of the (i)bss. * check items: (1) security * (2) network_type * (3) WMM * (4) HT * (5) others */ static int is_desired_network(struct _adapter *adapter, struct wlan_network *pnetwork) { u8 wps_ie[512]; uint wps_ielen; int bselected = true; struct security_priv *psecuritypriv = &adapter->securitypriv; if (psecuritypriv->wps_phase) { if (r8712_get_wps_ie(pnetwork->network.IEs, pnetwork->network.IELength, wps_ie, &wps_ielen)) return true; return false; } if ((psecuritypriv->PrivacyAlgrthm != _NO_PRIVACY_) && (pnetwork->network.Privacy == 0)) bselected = false; if (check_fwstate(&adapter->mlmepriv, WIFI_ADHOC_STATE)) { if (pnetwork->network.InfrastructureMode != adapter->mlmepriv.cur_network.network.InfrastructureMode) bselected = false; } return bselected; } /* TODO: Perry : For Power Management */ void r8712_atimdone_event_callback(struct _adapter *adapter, u8 *pbuf) { } void r8712_survey_event_callback(struct _adapter *adapter, u8 *pbuf) { unsigned long flags; u32 len; struct wlan_bssid_ex *pnetwork; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; pnetwork = (struct wlan_bssid_ex *)pbuf; #ifdef __BIG_ENDIAN /* endian_convert */ pnetwork->Length = le32_to_cpu(pnetwork->Length); pnetwork->Ssid.SsidLength = le32_to_cpu(pnetwork->Ssid.SsidLength); pnetwork->Privacy = le32_to_cpu(pnetwork->Privacy); pnetwork->Rssi = le32_to_cpu(pnetwork->Rssi); pnetwork->NetworkTypeInUse = le32_to_cpu(pnetwork->NetworkTypeInUse); pnetwork->Configuration.ATIMWindow = le32_to_cpu(pnetwork->Configuration.ATIMWindow); pnetwork->Configuration.BeaconPeriod = le32_to_cpu(pnetwork->Configuration.BeaconPeriod); pnetwork->Configuration.DSConfig = le32_to_cpu(pnetwork->Configuration.DSConfig); pnetwork->Configuration.FHConfig.DwellTime = le32_to_cpu(pnetwork->Configuration.FHConfig.DwellTime); pnetwork->Configuration.FHConfig.HopPattern = le32_to_cpu(pnetwork->Configuration.FHConfig.HopPattern); pnetwork->Configuration.FHConfig.HopSet = le32_to_cpu(pnetwork->Configuration.FHConfig.HopSet); pnetwork->Configuration.FHConfig.Length = le32_to_cpu(pnetwork->Configuration.FHConfig.Length); pnetwork->Configuration.Length = le32_to_cpu(pnetwork->Configuration.Length); pnetwork->InfrastructureMode = le32_to_cpu(pnetwork->InfrastructureMode); pnetwork->IELength = le32_to_cpu(pnetwork->IELength); #endif len = r8712_get_wlan_bssid_ex_sz(pnetwork); if (len > sizeof(struct wlan_bssid_ex)) return; spin_lock_irqsave(&pmlmepriv->lock2, flags); /* update IBSS_network 's timestamp */ if (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE)) { if (!memcmp(&(pmlmepriv->cur_network.network.MacAddress), pnetwork->MacAddress, ETH_ALEN)) { struct wlan_network *ibss_wlan = NULL; memcpy(pmlmepriv->cur_network.network.IEs, pnetwork->IEs, 8); ibss_wlan = r8712_find_network( &pmlmepriv->scanned_queue, pnetwork->MacAddress); if (ibss_wlan) { memcpy(ibss_wlan->network.IEs, pnetwork->IEs, 8); goto exit; } } } /* lock pmlmepriv->lock when you accessing network_q */ if (!check_fwstate(pmlmepriv, _FW_UNDER_LINKING)) { if (pnetwork->Ssid.Ssid[0] != 0) { rtl8711_add_network(adapter, pnetwork); } else { pnetwork->Ssid.SsidLength = 8; memcpy(pnetwork->Ssid.Ssid, "<hidden>", 8); rtl8711_add_network(adapter, pnetwork); } } exit: spin_unlock_irqrestore(&pmlmepriv->lock2, flags); } void r8712_surveydone_event_callback(struct _adapter *adapter, u8 *pbuf) { unsigned long irqL; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; spin_lock_irqsave(&pmlmepriv->lock, irqL); if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY)) { del_timer(&pmlmepriv->scan_to_timer); _clr_fwstate_(pmlmepriv, _FW_UNDER_SURVEY); } if (pmlmepriv->to_join) { if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE)) { if (!check_fwstate(pmlmepriv, _FW_LINKED)) { set_fwstate(pmlmepriv, _FW_UNDER_LINKING); if (!r8712_select_and_join_from_scan(pmlmepriv)) { mod_timer(&pmlmepriv->assoc_timer, jiffies + msecs_to_jiffies(MAX_JOIN_TIMEOUT)); } else { struct wlan_bssid_ex *pdev_network = &(adapter->registrypriv.dev_network); u8 *pibss = adapter->registrypriv.dev_network.MacAddress; pmlmepriv->fw_state ^= _FW_UNDER_SURVEY; memcpy(&pdev_network->Ssid, &pmlmepriv->assoc_ssid, sizeof(struct ndis_802_11_ssid)); r8712_update_registrypriv_dev_network (adapter); r8712_generate_random_ibss(pibss); pmlmepriv->fw_state = WIFI_ADHOC_MASTER_STATE; pmlmepriv->to_join = false; } } } else { pmlmepriv->to_join = false; set_fwstate(pmlmepriv, _FW_UNDER_LINKING); if (!r8712_select_and_join_from_scan(pmlmepriv)) mod_timer(&pmlmepriv->assoc_timer, jiffies + msecs_to_jiffies(MAX_JOIN_TIMEOUT)); else _clr_fwstate_(pmlmepriv, _FW_UNDER_LINKING); } } spin_unlock_irqrestore(&pmlmepriv->lock, irqL); } /* *r8712_free_assoc_resources: the caller has to lock pmlmepriv->lock */ void r8712_free_assoc_resources(struct _adapter *adapter) { unsigned long irqL; struct wlan_network *pwlan = NULL; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; struct sta_priv *pstapriv = &adapter->stapriv; struct wlan_network *tgt_network = &pmlmepriv->cur_network; pwlan = r8712_find_network(&pmlmepriv->scanned_queue, tgt_network->network.MacAddress); if (check_fwstate(pmlmepriv, WIFI_STATION_STATE | WIFI_AP_STATE)) { struct sta_info *psta; psta = r8712_get_stainfo(&adapter->stapriv, tgt_network->network.MacAddress); spin_lock_irqsave(&pstapriv->sta_hash_lock, irqL); r8712_free_stainfo(adapter, psta); spin_unlock_irqrestore(&pstapriv->sta_hash_lock, irqL); } if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE | WIFI_ADHOC_MASTER_STATE | WIFI_AP_STATE)) r8712_free_all_stainfo(adapter); if (pwlan) pwlan->fixed = false; if (((check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE)) && (adapter->stapriv.asoc_sta_count == 1))) free_network_nolock(pmlmepriv, pwlan); } /* * r8712_indicate_connect: the caller has to lock pmlmepriv->lock */ void r8712_indicate_connect(struct _adapter *padapter) { struct mlme_priv *pmlmepriv = &padapter->mlmepriv; pmlmepriv->to_join = false; set_fwstate(pmlmepriv, _FW_LINKED); padapter->ledpriv.LedControlHandler(padapter, LED_CTL_LINK); r8712_os_indicate_connect(padapter); if (padapter->registrypriv.power_mgnt > PS_MODE_ACTIVE) mod_timer(&pmlmepriv->dhcp_timer, jiffies + msecs_to_jiffies(60000)); } /* * r8712_ind_disconnect: the caller has to lock pmlmepriv->lock */ void r8712_ind_disconnect(struct _adapter *padapter) { struct mlme_priv *pmlmepriv = &padapter->mlmepriv; if (check_fwstate(pmlmepriv, _FW_LINKED)) { _clr_fwstate_(pmlmepriv, _FW_LINKED); padapter->ledpriv.LedControlHandler(padapter, LED_CTL_NO_LINK); r8712_os_indicate_disconnect(padapter); } if (padapter->pwrctrlpriv.pwr_mode != padapter->registrypriv.power_mgnt) { del_timer(&pmlmepriv->dhcp_timer); r8712_set_ps_mode(padapter, padapter->registrypriv.power_mgnt, padapter->registrypriv.smart_ps); } } /*Notes: *pnetwork : returns from r8712_joinbss_event_callback *ptarget_wlan: found from scanned_queue *if join_res > 0, for (fw_state==WIFI_STATION_STATE), we check if * "ptarget_sta" & "ptarget_wlan" exist. *if join_res > 0, for (fw_state==WIFI_ADHOC_STATE), we only check * if "ptarget_wlan" exist. *if join_res > 0, update "cur_network->network" from * "pnetwork->network" if (ptarget_wlan !=NULL). */ void r8712_joinbss_event_callback(struct _adapter *adapter, u8 *pbuf) { unsigned long irqL = 0, irqL2; struct sta_info *ptarget_sta = NULL, *pcur_sta = NULL; struct sta_priv *pstapriv = &adapter->stapriv; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; struct wlan_network *cur_network = &pmlmepriv->cur_network; struct wlan_network *pcur_wlan = NULL, *ptarget_wlan = NULL; unsigned int the_same_macaddr = false; struct wlan_network *pnetwork; if (sizeof(struct list_head) == 4 * sizeof(u32)) { pnetwork = kmalloc(sizeof(struct wlan_network), GFP_ATOMIC); if (!pnetwork) return; memcpy((u8 *)pnetwork + 16, (u8 *)pbuf + 8, sizeof(struct wlan_network) - 16); } else { pnetwork = (struct wlan_network *)pbuf; } #ifdef __BIG_ENDIAN /* endian_convert */ pnetwork->join_res = le32_to_cpu(pnetwork->join_res); pnetwork->network_type = le32_to_cpu(pnetwork->network_type); pnetwork->network.Length = le32_to_cpu(pnetwork->network.Length); pnetwork->network.Ssid.SsidLength = le32_to_cpu(pnetwork->network.Ssid.SsidLength); pnetwork->network.Privacy = le32_to_cpu(pnetwork->network.Privacy); pnetwork->network.Rssi = le32_to_cpu(pnetwork->network.Rssi); pnetwork->network.NetworkTypeInUse = le32_to_cpu(pnetwork->network.NetworkTypeInUse); pnetwork->network.Configuration.ATIMWindow = le32_to_cpu(pnetwork->network.Configuration.ATIMWindow); pnetwork->network.Configuration.BeaconPeriod = le32_to_cpu(pnetwork->network.Configuration.BeaconPeriod); pnetwork->network.Configuration.DSConfig = le32_to_cpu(pnetwork->network.Configuration.DSConfig); pnetwork->network.Configuration.FHConfig.DwellTime = le32_to_cpu(pnetwork->network.Configuration.FHConfig.DwellTime); pnetwork->network.Configuration.FHConfig.HopPattern = le32_to_cpu(pnetwork->network.Configuration.FHConfig.HopPattern); pnetwork->network.Configuration.FHConfig.HopSet = le32_to_cpu(pnetwork->network.Configuration.FHConfig.HopSet); pnetwork->network.Configuration.FHConfig.Length = le32_to_cpu(pnetwork->network.Configuration.FHConfig.Length); pnetwork->network.Configuration.Length = le32_to_cpu(pnetwork->network.Configuration.Length); pnetwork->network.InfrastructureMode = le32_to_cpu(pnetwork->network.InfrastructureMode); pnetwork->network.IELength = le32_to_cpu(pnetwork->network.IELength); #endif the_same_macaddr = !memcmp(pnetwork->network.MacAddress, cur_network->network.MacAddress, ETH_ALEN); pnetwork->network.Length = r8712_get_wlan_bssid_ex_sz(&pnetwork->network); spin_lock_irqsave(&pmlmepriv->lock, irqL); if (pnetwork->network.Length > sizeof(struct wlan_bssid_ex)) goto ignore_joinbss_callback; if (pnetwork->join_res > 0) { if (check_fwstate(pmlmepriv, _FW_UNDER_LINKING)) { /*s1. find ptarget_wlan*/ if (check_fwstate(pmlmepriv, _FW_LINKED)) { if (the_same_macaddr) { ptarget_wlan = r8712_find_network(&pmlmepriv->scanned_queue, cur_network->network.MacAddress); } else { pcur_wlan = r8712_find_network(&pmlmepriv->scanned_queue, cur_network->network.MacAddress); if (pcur_wlan) pcur_wlan->fixed = false; pcur_sta = r8712_get_stainfo(pstapriv, cur_network->network.MacAddress); spin_lock_irqsave(&pstapriv->sta_hash_lock, irqL2); r8712_free_stainfo(adapter, pcur_sta); spin_unlock_irqrestore(&(pstapriv->sta_hash_lock), irqL2); ptarget_wlan = r8712_find_network(&pmlmepriv->scanned_queue, pnetwork->network.MacAddress); if (ptarget_wlan) ptarget_wlan->fixed = true; } } else { ptarget_wlan = r8712_find_network(&pmlmepriv->scanned_queue, pnetwork->network.MacAddress); if (ptarget_wlan) ptarget_wlan->fixed = true; } if (!ptarget_wlan) { if (check_fwstate(pmlmepriv, _FW_UNDER_LINKING)) pmlmepriv->fw_state ^= _FW_UNDER_LINKING; goto ignore_joinbss_callback; } /*s2. find ptarget_sta & update ptarget_sta*/ if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)) { if (the_same_macaddr) { ptarget_sta = r8712_get_stainfo(pstapriv, pnetwork->network.MacAddress); if (!ptarget_sta) ptarget_sta = r8712_alloc_stainfo(pstapriv, pnetwork->network.MacAddress); } else { ptarget_sta = r8712_alloc_stainfo(pstapriv, pnetwork->network.MacAddress); } if (ptarget_sta) /*update ptarget_sta*/ { ptarget_sta->aid = pnetwork->join_res; ptarget_sta->qos_option = 1; ptarget_sta->mac_id = 5; if (adapter->securitypriv.AuthAlgrthm == 2) { adapter->securitypriv.binstallGrpkey = false; adapter->securitypriv.busetkipkey = false; adapter->securitypriv.bgrpkey_handshake = false; ptarget_sta->ieee8021x_blocked = true; ptarget_sta->XPrivacy = adapter->securitypriv.PrivacyAlgrthm; memset((u8 *)&ptarget_sta->x_UncstKey, 0, sizeof(union Keytype)); memset((u8 *)&ptarget_sta->tkiprxmickey, 0, sizeof(union Keytype)); memset((u8 *)&ptarget_sta->tkiptxmickey, 0, sizeof(union Keytype)); memset((u8 *)&ptarget_sta->txpn, 0, sizeof(union pn48)); memset((u8 *)&ptarget_sta->rxpn, 0, sizeof(union pn48)); } } else { if (check_fwstate(pmlmepriv, _FW_UNDER_LINKING)) pmlmepriv->fw_state ^= _FW_UNDER_LINKING; goto ignore_joinbss_callback; } } /*s3. update cur_network & indicate connect*/ memcpy(&cur_network->network, &pnetwork->network, pnetwork->network.Length); cur_network->aid = pnetwork->join_res; /*update fw_state will clr _FW_UNDER_LINKING*/ switch (pnetwork->network.InfrastructureMode) { case Ndis802_11Infrastructure: pmlmepriv->fw_state = WIFI_STATION_STATE; break; case Ndis802_11IBSS: pmlmepriv->fw_state = WIFI_ADHOC_STATE; break; default: pmlmepriv->fw_state = WIFI_NULL_STATE; break; } r8712_update_protection(adapter, (cur_network->network.IEs) + sizeof(struct NDIS_802_11_FIXED_IEs), (cur_network->network.IELength)); /*TODO: update HT_Capability*/ update_ht_cap(adapter, cur_network->network.IEs, cur_network->network.IELength); /*indicate connect*/ if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)) r8712_indicate_connect(adapter); del_timer(&pmlmepriv->assoc_timer); } else { goto ignore_joinbss_callback; } } else { if (check_fwstate(pmlmepriv, _FW_UNDER_LINKING)) { mod_timer(&pmlmepriv->assoc_timer, jiffies + msecs_to_jiffies(1)); _clr_fwstate_(pmlmepriv, _FW_UNDER_LINKING); } } ignore_joinbss_callback: spin_unlock_irqrestore(&pmlmepriv->lock, irqL); if (sizeof(struct list_head) == 4 * sizeof(u32)) kfree(pnetwork); } void r8712_stassoc_event_callback(struct _adapter *adapter, u8 *pbuf) { unsigned long irqL; struct sta_info *psta; struct mlme_priv *pmlmepriv = &(adapter->mlmepriv); struct stassoc_event *pstassoc = (struct stassoc_event *)pbuf; /* to do: */ if (!r8712_access_ctrl(&adapter->acl_list, pstassoc->macaddr)) return; psta = r8712_get_stainfo(&adapter->stapriv, pstassoc->macaddr); if (psta) { /*the sta have been in sta_info_queue => do nothing *(between drv has received this event before and * fw have not yet to set key to CAM_ENTRY) */ return; } psta = r8712_alloc_stainfo(&adapter->stapriv, pstassoc->macaddr); if (!psta) return; /* to do : init sta_info variable */ psta->qos_option = 0; psta->mac_id = le32_to_cpu(pstassoc->cam_id); /* psta->aid = (uint)pstassoc->cam_id; */ if (adapter->securitypriv.AuthAlgrthm == 2) psta->XPrivacy = adapter->securitypriv.PrivacyAlgrthm; psta->ieee8021x_blocked = false; spin_lock_irqsave(&pmlmepriv->lock, irqL); if (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE) || check_fwstate(pmlmepriv, WIFI_ADHOC_STATE)) { if (adapter->stapriv.asoc_sta_count == 2) { /* a sta + bc/mc_stainfo (not Ibss_stainfo) */ r8712_indicate_connect(adapter); } } spin_unlock_irqrestore(&pmlmepriv->lock, irqL); } void r8712_stadel_event_callback(struct _adapter *adapter, u8 *pbuf) { unsigned long irqL, irqL2; struct sta_info *psta; struct wlan_network *pwlan = NULL; struct wlan_bssid_ex *pdev_network = NULL; u8 *pibss = NULL; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; struct stadel_event *pstadel = (struct stadel_event *)pbuf; struct sta_priv *pstapriv = &adapter->stapriv; struct wlan_network *tgt_network = &pmlmepriv->cur_network; spin_lock_irqsave(&pmlmepriv->lock, irqL2); if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)) { r8712_ind_disconnect(adapter); r8712_free_assoc_resources(adapter); } if (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE | WIFI_ADHOC_STATE)) { psta = r8712_get_stainfo(&adapter->stapriv, pstadel->macaddr); spin_lock_irqsave(&pstapriv->sta_hash_lock, irqL); r8712_free_stainfo(adapter, psta); spin_unlock_irqrestore(&pstapriv->sta_hash_lock, irqL); if (adapter->stapriv.asoc_sta_count == 1) { /*a sta + bc/mc_stainfo (not Ibss_stainfo) */ pwlan = r8712_find_network(&pmlmepriv->scanned_queue, tgt_network->network.MacAddress); if (pwlan) { pwlan->fixed = false; free_network_nolock(pmlmepriv, pwlan); } /*re-create ibss*/ pdev_network = &(adapter->registrypriv.dev_network); pibss = adapter->registrypriv.dev_network.MacAddress; memcpy(pdev_network, &tgt_network->network, r8712_get_wlan_bssid_ex_sz(&tgt_network->network)); memcpy(&pdev_network->Ssid, &pmlmepriv->assoc_ssid, sizeof(struct ndis_802_11_ssid)); r8712_update_registrypriv_dev_network(adapter); r8712_generate_random_ibss(pibss); if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE)) { _clr_fwstate_(pmlmepriv, WIFI_ADHOC_STATE); set_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE); } } } spin_unlock_irqrestore(&pmlmepriv->lock, irqL2); } void r8712_cpwm_event_callback(struct _adapter *adapter, u8 *pbuf) { struct reportpwrstate_parm *preportpwrstate = (struct reportpwrstate_parm *)pbuf; preportpwrstate->state |= (u8)(adapter->pwrctrlpriv.cpwm_tog + 0x80); r8712_cpwm_int_hdl(adapter, preportpwrstate); } /* When the Netgear 3500 AP is with WPA2PSK-AES mode, it will send * the ADDBA req frame with start seq control = 0 to wifi client after * the WPA handshake and the seqence number of following data packet * will be 0. In this case, the Rx reorder sequence is not longer than 0 * and the WiFi client will drop the data with seq number 0. * So, the 8712 firmware has to inform driver with receiving the * ADDBA-Req frame so that the driver can reset the * sequence value of Rx reorder control. */ void r8712_got_addbareq_event_callback(struct _adapter *adapter, u8 *pbuf) { struct ADDBA_Req_Report_parm *pAddbareq_pram = (struct ADDBA_Req_Report_parm *)pbuf; struct sta_info *psta; struct sta_priv *pstapriv = &adapter->stapriv; struct recv_reorder_ctrl *precvreorder_ctrl = NULL; psta = r8712_get_stainfo(pstapriv, pAddbareq_pram->MacAddress); if (psta) { precvreorder_ctrl = &psta->recvreorder_ctrl[pAddbareq_pram->tid]; /* set the indicate_seq to 0xffff so that the rx reorder * can store any following data packet. */ precvreorder_ctrl->indicate_seq = 0xffff; } } void r8712_wpspbc_event_callback(struct _adapter *adapter, u8 *pbuf) { if (!adapter->securitypriv.wps_hw_pbc_pressed) adapter->securitypriv.wps_hw_pbc_pressed = true; } void _r8712_sitesurvey_ctrl_handler(struct _adapter *adapter) { struct mlme_priv *pmlmepriv = &adapter->mlmepriv; struct sitesurvey_ctrl *psitesurveyctrl = &pmlmepriv->sitesurveyctrl; struct registry_priv *pregistrypriv = &adapter->registrypriv; u64 current_tx_pkts; uint current_rx_pkts; current_tx_pkts = (adapter->xmitpriv.tx_pkts) - (psitesurveyctrl->last_tx_pkts); current_rx_pkts = (adapter->recvpriv.rx_pkts) - (psitesurveyctrl->last_rx_pkts); psitesurveyctrl->last_tx_pkts = adapter->xmitpriv.tx_pkts; psitesurveyctrl->last_rx_pkts = adapter->recvpriv.rx_pkts; if ((current_tx_pkts > pregistrypriv->busy_thresh) || (current_rx_pkts > pregistrypriv->busy_thresh)) psitesurveyctrl->traffic_busy = true; else psitesurveyctrl->traffic_busy = false; } void _r8712_join_timeout_handler(struct _adapter *adapter) { unsigned long irqL; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; if (adapter->driver_stopped || adapter->surprise_removed) return; spin_lock_irqsave(&pmlmepriv->lock, irqL); _clr_fwstate_(pmlmepriv, _FW_UNDER_LINKING); pmlmepriv->to_join = false; if (check_fwstate(pmlmepriv, _FW_LINKED)) { r8712_os_indicate_disconnect(adapter); _clr_fwstate_(pmlmepriv, _FW_LINKED); } if (adapter->pwrctrlpriv.pwr_mode != adapter->registrypriv.power_mgnt) { r8712_set_ps_mode(adapter, adapter->registrypriv.power_mgnt, adapter->registrypriv.smart_ps); } spin_unlock_irqrestore(&pmlmepriv->lock, irqL); } void r8712_scan_timeout_handler (struct _adapter *adapter) { unsigned long irqL; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; spin_lock_irqsave(&pmlmepriv->lock, irqL); _clr_fwstate_(pmlmepriv, _FW_UNDER_SURVEY); pmlmepriv->to_join = false; /* scan fail, so clear to_join flag */ spin_unlock_irqrestore(&pmlmepriv->lock, irqL); } void _r8712_dhcp_timeout_handler (struct _adapter *adapter) { if (adapter->driver_stopped || adapter->surprise_removed) return; if (adapter->pwrctrlpriv.pwr_mode != adapter->registrypriv.power_mgnt) r8712_set_ps_mode(adapter, adapter->registrypriv.power_mgnt, adapter->registrypriv.smart_ps); } int r8712_select_and_join_from_scan(struct mlme_priv *pmlmepriv) { struct list_head *phead; unsigned char *dst_ssid, *src_ssid; struct _adapter *adapter; struct __queue *queue = NULL; struct wlan_network *pnetwork = NULL; struct wlan_network *pnetwork_max_rssi = NULL; adapter = (struct _adapter *)pmlmepriv->nic_hdl; queue = &pmlmepriv->scanned_queue; phead = &queue->queue; pmlmepriv->pscanned = phead->next; while (1) { if (end_of_queue_search(phead, pmlmepriv->pscanned)) { if (pmlmepriv->assoc_by_rssi && pnetwork_max_rssi) { pnetwork = pnetwork_max_rssi; goto ask_for_joinbss; } return -EINVAL; } pnetwork = container_of(pmlmepriv->pscanned, struct wlan_network, list); pmlmepriv->pscanned = pmlmepriv->pscanned->next; if (pmlmepriv->assoc_by_bssid) { dst_ssid = pnetwork->network.MacAddress; src_ssid = pmlmepriv->assoc_bssid; if (!memcmp(dst_ssid, src_ssid, ETH_ALEN)) { if (check_fwstate(pmlmepriv, _FW_LINKED)) { if (is_same_network(&pmlmepriv->cur_network.network, &pnetwork->network)) { _clr_fwstate_(pmlmepriv, _FW_UNDER_LINKING); /*r8712_indicate_connect again*/ r8712_indicate_connect(adapter); return 2; } r8712_disassoc_cmd(adapter); r8712_ind_disconnect(adapter); r8712_free_assoc_resources(adapter); } goto ask_for_joinbss; } } else if (pmlmepriv->assoc_ssid.SsidLength == 0) { goto ask_for_joinbss; } dst_ssid = pnetwork->network.Ssid.Ssid; src_ssid = pmlmepriv->assoc_ssid.Ssid; if ((pnetwork->network.Ssid.SsidLength == pmlmepriv->assoc_ssid.SsidLength) && (!memcmp(dst_ssid, src_ssid, pmlmepriv->assoc_ssid.SsidLength))) { if (pmlmepriv->assoc_by_rssi) { /* if the ssid is the same, select the bss * which has the max rssi */ if (pnetwork_max_rssi) { if (pnetwork->network.Rssi > pnetwork_max_rssi->network.Rssi) pnetwork_max_rssi = pnetwork; } else { pnetwork_max_rssi = pnetwork; } } else if (is_desired_network(adapter, pnetwork)) { if (check_fwstate(pmlmepriv, _FW_LINKED)) { r8712_disassoc_cmd(adapter); r8712_free_assoc_resources(adapter); } goto ask_for_joinbss; } } } ask_for_joinbss: return r8712_joinbss_cmd(adapter, pnetwork); } int r8712_set_auth(struct _adapter *adapter, struct security_priv *psecuritypriv) { struct cmd_priv *pcmdpriv = &adapter->cmdpriv; struct cmd_obj *pcmd; struct setauth_parm *psetauthparm; pcmd = kmalloc(sizeof(*pcmd), GFP_ATOMIC); if (!pcmd) return -ENOMEM; psetauthparm = kzalloc(sizeof(*psetauthparm), GFP_ATOMIC); if (!psetauthparm) { kfree(pcmd); return -ENOMEM; } psetauthparm->mode = (u8)psecuritypriv->AuthAlgrthm; pcmd->cmdcode = _SetAuth_CMD_; pcmd->parmbuf = (unsigned char *)psetauthparm; pcmd->cmdsz = sizeof(struct setauth_parm); pcmd->rsp = NULL; pcmd->rspsz = 0; INIT_LIST_HEAD(&pcmd->list); r8712_enqueue_cmd(pcmdpriv, pcmd); return 0; } int r8712_set_key(struct _adapter *adapter, struct security_priv *psecuritypriv, sint keyid) { struct cmd_priv *pcmdpriv = &adapter->cmdpriv; struct cmd_obj *pcmd; struct setkey_parm *psetkeyparm; u8 keylen; int ret; pcmd = kmalloc(sizeof(*pcmd), GFP_ATOMIC); if (!pcmd) return -ENOMEM; psetkeyparm = kzalloc(sizeof(*psetkeyparm), GFP_ATOMIC); if (!psetkeyparm) { ret = -ENOMEM; goto err_free_cmd; } if (psecuritypriv->AuthAlgrthm == 2) { /* 802.1X */ psetkeyparm->algorithm = (u8)psecuritypriv->XGrpPrivacy; } else { /* WEP */ psetkeyparm->algorithm = (u8)psecuritypriv->PrivacyAlgrthm; } psetkeyparm->keyid = (u8)keyid; switch (psetkeyparm->algorithm) { case _WEP40_: keylen = 5; memcpy(psetkeyparm->key, psecuritypriv->DefKey[keyid].skey, keylen); break; case _WEP104_: keylen = 13; memcpy(psetkeyparm->key, psecuritypriv->DefKey[keyid].skey, keylen); break; case _TKIP_: if (keyid < 1 || keyid > 2) { ret = -EINVAL; goto err_free_parm; } keylen = 16; memcpy(psetkeyparm->key, &psecuritypriv->XGrpKey[keyid - 1], keylen); psetkeyparm->grpkey = 1; break; case _AES_: if (keyid < 1 || keyid > 2) { ret = -EINVAL; goto err_free_parm; } keylen = 16; memcpy(psetkeyparm->key, &psecuritypriv->XGrpKey[keyid - 1], keylen); psetkeyparm->grpkey = 1; break; default: ret = -EINVAL; goto err_free_parm; } pcmd->cmdcode = _SetKey_CMD_; pcmd->parmbuf = (u8 *)psetkeyparm; pcmd->cmdsz = (sizeof(struct setkey_parm)); pcmd->rsp = NULL; pcmd->rspsz = 0; INIT_LIST_HEAD(&pcmd->list); r8712_enqueue_cmd(pcmdpriv, pcmd); return 0; err_free_parm: kfree(psetkeyparm); err_free_cmd: kfree(pcmd); return ret; } /* adjust IEs for r8712_joinbss_cmd in WMM */ int r8712_restruct_wmm_ie(struct _adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_len, uint initial_out_len) { unsigned int ielength = 0; unsigned int i, j; i = 12; /* after the fixed IE */ while (i < in_len) { ielength = initial_out_len; if (in_ie[i] == 0xDD && in_ie[i + 2] == 0x00 && in_ie[i + 3] == 0x50 && in_ie[i + 4] == 0xF2 && in_ie[i + 5] == 0x02 && i + 5 < in_len) { /*WMM element ID and OUI*/ for (j = i; j < i + 9; j++) { out_ie[ielength] = in_ie[j]; ielength++; } out_ie[initial_out_len + 1] = 0x07; out_ie[initial_out_len + 6] = 0x00; out_ie[initial_out_len + 8] = 0x00; break; } i += (in_ie[i + 1] + 2); /* to the next IE element */ } return ielength; } /* * Ported from 8185: IsInPreAuthKeyList(). * * Search by BSSID, * Return Value: * -1 :if there is no pre-auth key in the table * >=0 :if there is pre-auth key, and return the entry id */ static int SecIsInPMKIDList(struct _adapter *Adapter, u8 *bssid) { struct security_priv *p = &Adapter->securitypriv; int i; for (i = 0; i < NUM_PMKID_CACHE; i++) if (p->PMKIDList[i].bUsed && !memcmp(p->PMKIDList[i].Bssid, bssid, ETH_ALEN)) return i; return -1; } sint r8712_restruct_sec_ie(struct _adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_len) { u8 authmode = 0, match; u8 sec_ie[IW_CUSTOM_MAX], uncst_oui[4], bkup_ie[255]; u8 wpa_oui[4] = {0x0, 0x50, 0xf2, 0x01}; uint ielength, cnt, remove_cnt; int iEntry; struct mlme_priv *pmlmepriv = &adapter->mlmepriv; struct security_priv *psecuritypriv = &adapter->securitypriv; uint ndisauthmode = psecuritypriv->ndisauthtype; uint ndissecuritytype = psecuritypriv->ndisencryptstatus; if ((ndisauthmode == Ndis802_11AuthModeWPA) || (ndisauthmode == Ndis802_11AuthModeWPAPSK)) { authmode = _WPA_IE_ID_; uncst_oui[0] = 0x0; uncst_oui[1] = 0x50; uncst_oui[2] = 0xf2; } if ((ndisauthmode == Ndis802_11AuthModeWPA2) || (ndisauthmode == Ndis802_11AuthModeWPA2PSK)) { authmode = _WPA2_IE_ID_; uncst_oui[0] = 0x0; uncst_oui[1] = 0x0f; uncst_oui[2] = 0xac; } switch (ndissecuritytype) { case Ndis802_11Encryption1Enabled: case Ndis802_11Encryption1KeyAbsent: uncst_oui[3] = 0x1; break; case Ndis802_11Encryption2Enabled: case Ndis802_11Encryption2KeyAbsent: uncst_oui[3] = 0x2; break; case Ndis802_11Encryption3Enabled: case Ndis802_11Encryption3KeyAbsent: uncst_oui[3] = 0x4; break; default: break; } /*Search required WPA or WPA2 IE and copy to sec_ie[] */ cnt = 12; match = false; while (cnt < in_len) { if (in_ie[cnt] == authmode) { if ((authmode == _WPA_IE_ID_) && (!memcmp(&in_ie[cnt + 2], &wpa_oui[0], 4))) { memcpy(&sec_ie[0], &in_ie[cnt], in_ie[cnt + 1] + 2); match = true; break; } if (authmode == _WPA2_IE_ID_) { memcpy(&sec_ie[0], &in_ie[cnt], in_ie[cnt + 1] + 2); match = true; break; } if (((authmode == _WPA_IE_ID_) && (!memcmp(&in_ie[cnt + 2], &wpa_oui[0], 4))) || (authmode == _WPA2_IE_ID_)) memcpy(&bkup_ie[0], &in_ie[cnt], in_ie[cnt + 1] + 2); } cnt += in_ie[cnt + 1] + 2; /*get next*/ } /*restruct WPA IE or WPA2 IE in sec_ie[] */ if (match) { if (sec_ie[0] == _WPA_IE_ID_) { /* parsing SSN IE to select required encryption * algorithm, and set the bc/mc encryption algorithm */ while (true) { /*check wpa_oui tag*/ if (memcmp(&sec_ie[2], &wpa_oui[0], 4)) { match = false; break; } if ((sec_ie[6] != 0x01) || (sec_ie[7] != 0x0)) { /*IE Ver error*/ match = false; break; } if (!memcmp(&sec_ie[8], &wpa_oui[0], 3)) { /* get bc/mc encryption type (group * key type) */ switch (sec_ie[11]) { case 0x0: /*none*/ psecuritypriv->XGrpPrivacy = _NO_PRIVACY_; break; case 0x1: /*WEP_40*/ psecuritypriv->XGrpPrivacy = _WEP40_; break; case 0x2: /*TKIP*/ psecuritypriv->XGrpPrivacy = _TKIP_; break; case 0x3: /*AESCCMP*/ case 0x4: psecuritypriv->XGrpPrivacy = _AES_; break; case 0x5: /*WEP_104*/ psecuritypriv->XGrpPrivacy = _WEP104_; break; } } else { match = false; break; } if (sec_ie[12] == 0x01) { /*check the unicast encryption type*/ if (memcmp(&sec_ie[14], &uncst_oui[0], 4)) { match = false; break; } /*else the uncst_oui is match*/ } else { /*mixed mode, unicast_enc_type > 1*/ /*select the uncst_oui and remove * the other uncst_oui */ cnt = sec_ie[12]; remove_cnt = (cnt - 1) * 4; sec_ie[12] = 0x01; memcpy(&sec_ie[14], &uncst_oui[0], 4); /*remove the other unicast suit*/ memcpy(&sec_ie[18], &sec_ie[18 + remove_cnt], sec_ie[1] - 18 + 2 - remove_cnt); sec_ie[1] = sec_ie[1] - remove_cnt; } break; } } if (authmode == _WPA2_IE_ID_) { /* parsing RSN IE to select required encryption * algorithm, and set the bc/mc encryption algorithm */ while (true) { if ((sec_ie[2] != 0x01) || (sec_ie[3] != 0x0)) { /*IE Ver error*/ match = false; break; } if (!memcmp(&sec_ie[4], &uncst_oui[0], 3)) { /*get bc/mc encryption type*/ switch (sec_ie[7]) { case 0x1: /*WEP_40*/ psecuritypriv->XGrpPrivacy = _WEP40_; break; case 0x2: /*TKIP*/ psecuritypriv->XGrpPrivacy = _TKIP_; break; case 0x4: /*AESWRAP*/ psecuritypriv->XGrpPrivacy = _AES_; break; case 0x5: /*WEP_104*/ psecuritypriv->XGrpPrivacy = _WEP104_; break; default: /*one*/ psecuritypriv->XGrpPrivacy = _NO_PRIVACY_; break; } } else { match = false; break; } if (sec_ie[8] == 0x01) { /*check the unicast encryption type*/ if (memcmp(&sec_ie[10], &uncst_oui[0], 4)) { match = false; break; } /*else the uncst_oui is match*/ } else { /*mixed mode, unicast_enc_type > 1*/ /*select the uncst_oui and remove the * other uncst_oui */ cnt = sec_ie[8]; remove_cnt = (cnt - 1) * 4; sec_ie[8] = 0x01; memcpy(&sec_ie[10], &uncst_oui[0], 4); /*remove the other unicast suit*/ memcpy(&sec_ie[14], &sec_ie[14 + remove_cnt], (sec_ie[1] - 14 + 2 - remove_cnt)); sec_ie[1] = sec_ie[1] - remove_cnt; } break; } } } if ((authmode == _WPA_IE_ID_) || (authmode == _WPA2_IE_ID_)) { /*copy fixed ie*/ memcpy(out_ie, in_ie, 12); ielength = 12; /*copy RSN or SSN*/ if (match) { memcpy(&out_ie[ielength], &sec_ie[0], sec_ie[1] + 2); ielength += sec_ie[1] + 2; if (authmode == _WPA2_IE_ID_) { /*the Pre-Authentication bit should be zero*/ out_ie[ielength - 1] = 0; out_ie[ielength - 2] = 0; } r8712_report_sec_ie(adapter, authmode, sec_ie); } } else { /*copy fixed ie only*/ memcpy(out_ie, in_ie, 12); ielength = 12; if (psecuritypriv->wps_phase) { memcpy(out_ie + ielength, psecuritypriv->wps_ie, psecuritypriv->wps_ie_len); ielength += psecuritypriv->wps_ie_len; } } iEntry = SecIsInPMKIDList(adapter, pmlmepriv->assoc_bssid); if (iEntry < 0) return ielength; if (authmode == _WPA2_IE_ID_) { out_ie[ielength] = 1; ielength++; out_ie[ielength] = 0; /*PMKID count = 0x0100*/ ielength++; memcpy(&out_ie[ielength], &psecuritypriv->PMKIDList[iEntry].PMKID, 16); ielength += 16; out_ie[13] += 18;/*PMKID length = 2+16*/ } return ielength; } void r8712_init_registrypriv_dev_network(struct _adapter *adapter) { struct registry_priv *pregistrypriv = &adapter->registrypriv; struct eeprom_priv *peepriv = &adapter->eeprompriv; struct wlan_bssid_ex *pdev_network = &pregistrypriv->dev_network; u8 *myhwaddr = myid(peepriv); memcpy(pdev_network->MacAddress, myhwaddr, ETH_ALEN); memcpy(&pdev_network->Ssid, &pregistrypriv->ssid, sizeof(struct ndis_802_11_ssid)); pdev_network->Configuration.Length = sizeof(struct NDIS_802_11_CONFIGURATION); pdev_network->Configuration.BeaconPeriod = 100; pdev_network->Configuration.FHConfig.Length = 0; pdev_network->Configuration.FHConfig.HopPattern = 0; pdev_network->Configuration.FHConfig.HopSet = 0; pdev_network->Configuration.FHConfig.DwellTime = 0; } void r8712_update_registrypriv_dev_network(struct _adapter *adapter) { int sz = 0; struct registry_priv *pregistrypriv = &adapter->registrypriv; struct wlan_bssid_ex *pdev_network = &pregistrypriv->dev_network; struct security_priv *psecuritypriv = &adapter->securitypriv; struct wlan_network *cur_network = &adapter->mlmepriv.cur_network; pdev_network->Privacy = cpu_to_le32(psecuritypriv->PrivacyAlgrthm > 0 ? 1 : 0); /* adhoc no 802.1x */ pdev_network->Rssi = 0; switch (pregistrypriv->wireless_mode) { case WIRELESS_11B: pdev_network->NetworkTypeInUse = Ndis802_11DS; break; case WIRELESS_11G: case WIRELESS_11BG: pdev_network->NetworkTypeInUse = Ndis802_11OFDM24; break; case WIRELESS_11A: pdev_network->NetworkTypeInUse = Ndis802_11OFDM5; break; default: /* TODO */ break; } pdev_network->Configuration.DSConfig = pregistrypriv->channel; if (cur_network->network.InfrastructureMode == Ndis802_11IBSS) pdev_network->Configuration.ATIMWindow = 3; pdev_network->InfrastructureMode = cur_network->network.InfrastructureMode; /* 1. Supported rates * 2. IE */ sz = r8712_generate_ie(pregistrypriv); pdev_network->IELength = sz; pdev_network->Length = r8712_get_wlan_bssid_ex_sz(pdev_network); } /*the function is at passive_level*/ void r8712_joinbss_reset(struct _adapter *padapter) { int i; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct ht_priv *phtpriv = &pmlmepriv->htpriv; /* todo: if you want to do something io/reg/hw setting before join_bss, * please add code here */ phtpriv->ampdu_enable = false;/*reset to disabled*/ for (i = 0; i < 16; i++) phtpriv->baddbareq_issued[i] = false;/*reset it*/ if (phtpriv->ht_option) { /* validate usb rx aggregation */ r8712_write8(padapter, 0x102500D9, 48);/*TH = 48 pages, 6k*/ } else { /* invalidate usb rx aggregation */ /* TH=1 => means that invalidate usb rx aggregation */ r8712_write8(padapter, 0x102500D9, 1); } } /*the function is >= passive_level*/ unsigned int r8712_restructure_ht_ie(struct _adapter *padapter, u8 *in_ie, u8 *out_ie, uint in_len, uint *pout_len) { u32 ielen, out_len; unsigned char *p; struct ieee80211_ht_cap ht_capie; unsigned char WMM_IE[] = {0x00, 0x50, 0xf2, 0x02, 0x00, 0x01, 0x00}; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct qos_priv *pqospriv = &pmlmepriv->qospriv; struct ht_priv *phtpriv = &pmlmepriv->htpriv; phtpriv->ht_option = 0; p = r8712_get_ie(in_ie + 12, WLAN_EID_HT_CAPABILITY, &ielen, in_len - 12); if (p && (ielen > 0)) { if (pqospriv->qos_option == 0) { out_len = *pout_len; r8712_set_ie(out_ie + out_len, WLAN_EID_VENDOR_SPECIFIC, _WMM_IE_Length_, WMM_IE, pout_len); pqospriv->qos_option = 1; } out_len = *pout_len; memset(&ht_capie, 0, sizeof(struct ieee80211_ht_cap)); ht_capie.cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_TX_STBC | IEEE80211_HT_CAP_MAX_AMSDU | IEEE80211_HT_CAP_DSSSCCK40); ht_capie.ampdu_params_info = (IEEE80211_HT_AMPDU_PARM_FACTOR & 0x03) | (IEEE80211_HT_AMPDU_PARM_DENSITY & 0x00); r8712_set_ie(out_ie + out_len, WLAN_EID_HT_CAPABILITY, sizeof(struct ieee80211_ht_cap), (unsigned char *)&ht_capie, pout_len); phtpriv->ht_option = 1; } return phtpriv->ht_option; } /* the function is > passive_level (in critical_section) */ static void update_ht_cap(struct _adapter *padapter, u8 *pie, uint ie_len) { u8 *p, max_ampdu_sz; int i; uint len; struct sta_info *bmc_sta, *psta; struct ieee80211_ht_cap *pht_capie; struct recv_reorder_ctrl *preorder_ctrl; struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct ht_priv *phtpriv = &pmlmepriv->htpriv; struct registry_priv *pregistrypriv = &padapter->registrypriv; struct wlan_network *pcur_network = &(pmlmepriv->cur_network); if (!phtpriv->ht_option) return; /* maybe needs check if ap supports rx ampdu. */ if (!phtpriv->ampdu_enable && (pregistrypriv->ampdu_enable == 1)) phtpriv->ampdu_enable = true; /*check Max Rx A-MPDU Size*/ len = 0; p = r8712_get_ie(pie + sizeof(struct NDIS_802_11_FIXED_IEs), WLAN_EID_HT_CAPABILITY, &len, ie_len - sizeof(struct NDIS_802_11_FIXED_IEs)); if (p && len > 0) { pht_capie = (struct ieee80211_ht_cap *)(p + 2); max_ampdu_sz = (pht_capie->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR); /* max_ampdu_sz (kbytes); */ max_ampdu_sz = 1 << (max_ampdu_sz + 3); phtpriv->rx_ampdu_maxlen = max_ampdu_sz; } /* for A-MPDU Rx reordering buffer control for bmc_sta & sta_info * if A-MPDU Rx is enabled, resetting rx_ordering_ctrl * wstart_b(indicate_seq) to default value=0xffff * todo: check if AP can send A-MPDU packets */ bmc_sta = r8712_get_bcmc_stainfo(padapter); if (bmc_sta) { for (i = 0; i < 16; i++) { preorder_ctrl = &bmc_sta->recvreorder_ctrl[i]; preorder_ctrl->indicate_seq = 0xffff; preorder_ctrl->wend_b = 0xffff; } } psta = r8712_get_stainfo(&padapter->stapriv, pcur_network->network.MacAddress); if (psta) { for (i = 0; i < 16; i++) { preorder_ctrl = &psta->recvreorder_ctrl[i]; preorder_ctrl->indicate_seq = 0xffff; preorder_ctrl->wend_b = 0xffff; } } len = 0; p = r8712_get_ie(pie + sizeof(struct NDIS_802_11_FIXED_IEs), WLAN_EID_HT_OPERATION, &len, ie_len - sizeof(struct NDIS_802_11_FIXED_IEs)); } void r8712_issue_addbareq_cmd(struct _adapter *padapter, int priority) { struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct ht_priv *phtpriv = &pmlmepriv->htpriv; if ((phtpriv->ht_option == 1) && (phtpriv->ampdu_enable)) { if (!phtpriv->baddbareq_issued[priority]) { r8712_addbareq_cmd(padapter, (u8)priority); phtpriv->baddbareq_issued[priority] = true; } } } |
1 3 3 3 1 2 5 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | // SPDX-License-Identifier: GPL-2.0-only /* * 32bit Socket syscall emulation. Based on arch/sparc64/kernel/sys_sparc32.c. * * Copyright (C) 2000 VA Linux Co * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> * Copyright (C) 2000,2001 Andi Kleen, SuSE Labs */ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/file.h> #include <linux/icmpv6.h> #include <linux/socket.h> #include <linux/syscalls.h> #include <linux/filter.h> #include <linux/compat.h> #include <linux/security.h> #include <linux/audit.h> #include <linux/export.h> #include <net/scm.h> #include <net/sock.h> #include <net/ip.h> #include <net/ipv6.h> #include <linux/uaccess.h> #include <net/compat.h> int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr *msg, struct sockaddr __user **save_addr) { ssize_t err; kmsg->msg_flags = msg->msg_flags; kmsg->msg_namelen = msg->msg_namelen; if (!msg->msg_name) kmsg->msg_namelen = 0; if (kmsg->msg_namelen < 0) return -EINVAL; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control_is_user = true; kmsg->msg_get_inq = 0; kmsg->msg_control_user = compat_ptr(msg->msg_control); kmsg->msg_controllen = msg->msg_controllen; if (save_addr) *save_addr = compat_ptr(msg->msg_name); if (msg->msg_name && kmsg->msg_namelen) { if (!save_addr) { err = move_addr_to_kernel(compat_ptr(msg->msg_name), kmsg->msg_namelen, kmsg->msg_name); if (err < 0) return err; } } else { kmsg->msg_name = NULL; kmsg->msg_namelen = 0; } if (msg->msg_iovlen > UIO_MAXIOV) return -EMSGSIZE; kmsg->msg_iocb = NULL; kmsg->msg_ubuf = NULL; return 0; } int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg, struct sockaddr __user **save_addr, struct iovec **iov) { struct compat_msghdr msg; ssize_t err; if (copy_from_user(&msg, umsg, sizeof(*umsg))) return -EFAULT; err = __get_compat_msghdr(kmsg, &msg, save_addr); if (err) return err; err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE, compat_ptr(msg.msg_iov), msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); return err < 0 ? err : 0; } /* Bleech... */ #define CMSG_COMPAT_ALIGN(len) ALIGN((len), sizeof(s32)) #define CMSG_COMPAT_DATA(cmsg) \ ((void __user *)((char __user *)(cmsg) + sizeof(struct compat_cmsghdr))) #define CMSG_COMPAT_SPACE(len) \ (sizeof(struct compat_cmsghdr) + CMSG_COMPAT_ALIGN(len)) #define CMSG_COMPAT_LEN(len) \ (sizeof(struct compat_cmsghdr) + (len)) #define CMSG_COMPAT_FIRSTHDR(msg) \ (((msg)->msg_controllen) >= sizeof(struct compat_cmsghdr) ? \ (struct compat_cmsghdr __user *)((msg)->msg_control_user) : \ (struct compat_cmsghdr __user *)NULL) #define CMSG_COMPAT_OK(ucmlen, ucmsg, mhdr) \ ((ucmlen) >= sizeof(struct compat_cmsghdr) && \ (ucmlen) <= (unsigned long) \ ((mhdr)->msg_controllen - \ ((char __user *)(ucmsg) - (char __user *)(mhdr)->msg_control_user))) static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg, struct compat_cmsghdr __user *cmsg, int cmsg_len) { char __user *ptr = (char __user *)cmsg + CMSG_COMPAT_ALIGN(cmsg_len); if ((unsigned long)(ptr + 1 - (char __user *)msg->msg_control_user) > msg->msg_controllen) return NULL; return (struct compat_cmsghdr __user *)ptr; } /* There is a lot of hair here because the alignment rules (and * thus placement) of cmsg headers and length are different for * 32-bit apps. -DaveM */ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, unsigned char *stackbuf, int stackbuf_size) { struct compat_cmsghdr __user *ucmsg; struct cmsghdr *kcmsg, *kcmsg_base; compat_size_t ucmlen; __kernel_size_t kcmlen, tmp; int err = -EFAULT; BUILD_BUG_ON(sizeof(struct compat_cmsghdr) != CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr))); kcmlen = 0; kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while (ucmsg != NULL) { if (get_user(ucmlen, &ucmsg->cmsg_len)) return -EFAULT; /* Catch bogons. */ if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg)) return -EINVAL; tmp = ((ucmlen - sizeof(*ucmsg)) + sizeof(struct cmsghdr)); tmp = CMSG_ALIGN(tmp); kcmlen += tmp; ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } if (kcmlen == 0) return -EINVAL; /* The kcmlen holds the 64-bit version of the control length. * It may not be modified as we do not stick it into the kmsg * until we have successfully copied over all of the data * from the user. */ if (kcmlen > stackbuf_size) kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL); if (kcmsg == NULL) return -ENOMEM; /* Now copy them over neatly. */ memset(kcmsg, 0, kcmlen); ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while (ucmsg != NULL) { struct compat_cmsghdr cmsg; if (copy_from_user(&cmsg, ucmsg, sizeof(cmsg))) goto Efault; if (!CMSG_COMPAT_OK(cmsg.cmsg_len, ucmsg, kmsg)) goto Einval; tmp = ((cmsg.cmsg_len - sizeof(*ucmsg)) + sizeof(struct cmsghdr)); if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp)) goto Einval; kcmsg->cmsg_len = tmp; kcmsg->cmsg_level = cmsg.cmsg_level; kcmsg->cmsg_type = cmsg.cmsg_type; tmp = CMSG_ALIGN(tmp); if (copy_from_user(CMSG_DATA(kcmsg), CMSG_COMPAT_DATA(ucmsg), (cmsg.cmsg_len - sizeof(*ucmsg)))) goto Efault; /* Advance. */ kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len); } /* * check the length of messages copied in is the same as the * what we get from the first loop */ if ((char *)kcmsg - (char *)kcmsg_base != kcmlen) goto Einval; /* Ok, looks like we made it. Hook it up and return success. */ kmsg->msg_control_is_user = false; kmsg->msg_control = kcmsg_base; kmsg->msg_controllen = kcmlen; return 0; Einval: err = -EINVAL; Efault: if (kcmsg_base != (struct cmsghdr *)stackbuf) sock_kfree_s(sk, kcmsg_base, kcmlen); return err; } int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control_user; struct compat_cmsghdr cmhdr; struct old_timeval32 ctv; struct old_timespec32 cts[3]; int cmlen; if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { kmsg->msg_flags |= MSG_CTRUNC; return 0; /* XXX: return error? check spec. */ } if (!COMPAT_USE_64BIT_TIME) { if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) { struct __kernel_old_timeval *tv = (struct __kernel_old_timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; len = sizeof(ctv); } if (level == SOL_SOCKET && (type == SO_TIMESTAMPNS_OLD || type == SO_TIMESTAMPING_OLD)) { int count = type == SO_TIMESTAMPNS_OLD ? 1 : 3; int i; struct __kernel_old_timespec *ts = data; for (i = 0; i < count; i++) { cts[i].tv_sec = ts[i].tv_sec; cts[i].tv_nsec = ts[i].tv_nsec; } data = &cts; len = sizeof(cts[0]) * count; } } cmlen = CMSG_COMPAT_LEN(len); if (kmsg->msg_controllen < cmlen) { kmsg->msg_flags |= MSG_CTRUNC; cmlen = kmsg->msg_controllen; } cmhdr.cmsg_level = level; cmhdr.cmsg_type = type; cmhdr.cmsg_len = cmlen; if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) return -EFAULT; if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr))) return -EFAULT; cmlen = CMSG_COMPAT_SPACE(len); if (kmsg->msg_controllen < cmlen) cmlen = kmsg->msg_controllen; kmsg->msg_control_user += cmlen; kmsg->msg_controllen -= cmlen; return 0; } static int scm_max_fds_compat(struct msghdr *msg) { if (msg->msg_controllen <= sizeof(struct compat_cmsghdr)) return 0; return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); } void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *)msg->msg_control_user; unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count); int __user *cmsg_data = CMSG_COMPAT_DATA(cm); int err = 0, i; for (i = 0; i < fdmax; i++) { err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; } if (i > 0) { int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_COMPAT_SPACE(i * sizeof(int)); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control_user += cmlen; msg->msg_controllen -= cmlen; } } if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) msg->msg_flags |= MSG_CTRUNC; /* * All of the files that fit in the message have had their usage counts * incremented, so we just free the list. */ __scm_destroy(scm); } /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) static unsigned char nas[21] = { AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), AL(4), AL(5), AL(4) }; #undef AL static inline long __compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { return __compat_sys_sendmsg(fd, msg, flags); } static inline long __compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags) { return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { return __compat_sys_sendmmsg(fd, mmsg, vlen, flags); } static inline long __compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { return __compat_sys_recvmsg(fd, msg, flags); } static inline long __compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned int flags, struct sockaddr __user *addr, int __user *addrlen) { return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) { return __compat_sys_recvfrom(fd, buf, len, flags, NULL, NULL); } COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int __user *, addrlen) { return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen); } COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct __kernel_timespec __user *, timeout) { return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, timeout, NULL); } #ifdef CONFIG_COMPAT_32BIT_TIME COMPAT_SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct old_timespec32 __user *, timeout) { return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL, timeout); } #endif COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) { u32 a[AUDITSC_ARGS]; unsigned int len; u32 a0, a1; int ret; if (call < SYS_SOCKET || call > SYS_SENDMMSG) return -EINVAL; len = nas[call]; if (len > sizeof(a)) return -EINVAL; if (copy_from_user(a, args, len)) return -EFAULT; ret = audit_socketcall_compat(len / sizeof(a[0]), a); if (ret) return ret; a0 = a[0]; a1 = a[1]; switch (call) { case SYS_SOCKET: ret = __sys_socket(a0, a1, a[2]); break; case SYS_BIND: ret = __sys_bind(a0, compat_ptr(a1), a[2]); break; case SYS_CONNECT: ret = __sys_connect(a0, compat_ptr(a1), a[2]); break; case SYS_LISTEN: ret = __sys_listen(a0, a1); break; case SYS_ACCEPT: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = __sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_GETPEERNAME: ret = __sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_SOCKETPAIR: ret = __sys_socketpair(a0, a1, a[2], compat_ptr(a[3])); break; case SYS_SEND: ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], NULL, 0); break; case SYS_SENDTO: ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]); break; case SYS_RECV: ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], NULL, NULL); break; case SYS_RECVFROM: ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: ret = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = __sys_setsockopt(a0, a1, a[2], compat_ptr(a[3]), a[4]); break; case SYS_GETSOCKOPT: ret = __sys_getsockopt(a0, a1, a[2], compat_ptr(a[3]), compat_ptr(a[4])); break; case SYS_SENDMSG: ret = __compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); break; case SYS_SENDMMSG: ret = __compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]); break; case SYS_RECVMSG: ret = __compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; case SYS_RECVMMSG: ret = __sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3] | MSG_CMSG_COMPAT, NULL, compat_ptr(a[4])); break; case SYS_ACCEPT4: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; break; } return ret; } |
233 1294 1294 538 538 233 233 233 991 991 982 982 991 991 670 670 709 707 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 | // SPDX-License-Identifier: GPL-2.0+ /* * User-space Probes (UProbes) * * Copyright (C) IBM Corporation, 2008-2012 * Authors: * Srikar Dronamraju * Jim Keniston * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra */ #include <linux/kernel.h> #include <linux/highmem.h> #include <linux/pagemap.h> /* read_mapping_page */ #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/export.h> #include <linux/rmap.h> /* anon_vma_prepare */ #include <linux/mmu_notifier.h> /* set_pte_at_notify */ #include <linux/swap.h> /* folio_free_swap */ #include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/kdebug.h> /* notifier mechanism */ #include <linux/percpu-rwsem.h> #include <linux/task_work.h> #include <linux/shmem_fs.h> #include <linux/khugepaged.h> #include <linux/uprobes.h> #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE static struct rb_root uprobes_tree = RB_ROOT; /* * allows us to skip the uprobe_mmap if there are no uprobe events active * at this time. Probably a fine grained per inode count is better? */ #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ #define UPROBES_HASH_SZ 13 /* serialize uprobe->pending_list */ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 struct uprobe { struct rb_node rb_node; /* node in the rb tree */ refcount_t ref; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct list_head pending_list; struct uprobe_consumer *consumers; struct inode *inode; /* Also hold a ref to inode */ loff_t offset; loff_t ref_ctr_offset; unsigned long flags; /* * The generic code assumes that it has two members of unknown type * owned by the arch-specific code: * * insn - copy_insn() saves the original instruction here for * arch_uprobe_analyze_insn(). * * ixol - potentially modified instruction to execute out of * line, copied to xol_area by xol_get_insn_slot(). */ struct arch_uprobe arch; }; struct delayed_uprobe { struct list_head list; struct uprobe *uprobe; struct mm_struct *mm; }; static DEFINE_MUTEX(delayed_uprobe_lock); static LIST_HEAD(delayed_uprobe_list); /* * Execute out of line area: anonymous executable mapping installed * by the probed task to execute the copy of the original instruction * mangled by set_swbp(). * * On a breakpoint hit, thread contests for a slot. It frees the * slot after singlestep. Currently a fixed number of slots are * allocated. */ struct xol_area { wait_queue_head_t wq; /* if all slots are busy */ atomic_t slot_count; /* number of in-use slots */ unsigned long *bitmap; /* 0 = free slot */ struct vm_special_mapping xol_mapping; struct page *pages[2]; /* * We keep the vma's vm_start rather than a pointer to the vma * itself. The probed process or a naughty kernel module could make * the vma go away, and we must handle that reasonably gracefully. */ unsigned long vaddr; /* Page(s) of instruction slots */ }; /* * valid_vma: Verify if the specified vma is an executable vma * Relax restrictions while unregistering: vm_flags might have * changed after breakpoint was inserted. * - is_register: indicates if we are in register context. * - Return 1 if the specified virtual address is in an * executable vma. */ static bool valid_vma(struct vm_area_struct *vma, bool is_register) { vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; if (is_register) flags |= VM_WRITE; return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC; } static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) { return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); } static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) { return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); } /** * __replace_page - replace page in vma by new page. * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page * @addr: address the old @page is mapped at * @old_page: the page we are replacing by new_page * @new_page: the modified page we replace page by * * If @new_page is NULL, only unmap @old_page. * * Returns 0 on success, negative error code otherwise. */ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct page *old_page, struct page *new_page) { struct folio *old_folio = page_folio(old_page); struct folio *new_folio; struct mm_struct *mm = vma->vm_mm; DEFINE_FOLIO_VMA_WALK(pvmw, old_folio, vma, addr, 0); int err; struct mmu_notifier_range range; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, addr, addr + PAGE_SIZE); if (new_page) { new_folio = page_folio(new_page); err = mem_cgroup_charge(new_folio, vma->vm_mm, GFP_KERNEL); if (err) return err; } /* For folio_free_swap() below */ folio_lock(old_folio); mmu_notifier_invalidate_range_start(&range); err = -EAGAIN; if (!page_vma_mapped_walk(&pvmw)) goto unlock; VM_BUG_ON_PAGE(addr != pvmw.address, old_page); if (new_page) { folio_get(new_folio); folio_add_new_anon_rmap(new_folio, vma, addr); folio_add_lru_vma(new_folio, vma); } else /* no new page, just dec_mm_counter for old_page */ dec_mm_counter(mm, MM_ANONPAGES); if (!folio_test_anon(old_folio)) { dec_mm_counter(mm, mm_counter_file(old_page)); inc_mm_counter(mm, MM_ANONPAGES); } flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte))); ptep_clear_flush(vma, addr, pvmw.pte); if (new_page) set_pte_at_notify(mm, addr, pvmw.pte, mk_pte(new_page, vma->vm_page_prot)); folio_remove_rmap_pte(old_folio, old_page, vma); if (!folio_mapped(old_folio)) folio_free_swap(old_folio); page_vma_mapped_walk_done(&pvmw); folio_put(old_folio); err = 0; unlock: mmu_notifier_invalidate_range_end(&range); folio_unlock(old_folio); return err; } /** * is_swbp_insn - check if instruction is breakpoint instruction. * @insn: instruction to be checked. * Default implementation of is_swbp_insn * Returns true if @insn is a breakpoint instruction. */ bool __weak is_swbp_insn(uprobe_opcode_t *insn) { return *insn == UPROBE_SWBP_INSN; } /** * is_trap_insn - check if instruction is breakpoint instruction. * @insn: instruction to be checked. * Default implementation of is_trap_insn * Returns true if @insn is a breakpoint instruction. * * This function is needed for the case where an architecture has multiple * trap instructions (like powerpc). */ bool __weak is_trap_insn(uprobe_opcode_t *insn) { return is_swbp_insn(insn); } static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) { void *kaddr = kmap_atomic(page); memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); kunmap_atomic(kaddr); } static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len) { void *kaddr = kmap_atomic(page); memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len); kunmap_atomic(kaddr); } static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode) { uprobe_opcode_t old_opcode; bool is_swbp; /* * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. * We do not check if it is any other 'trap variant' which could * be conditional trap instruction such as the one powerpc supports. * * The logic is that we do not care if the underlying instruction * is a trap variant; uprobes always wins over any other (gdb) * breakpoint. */ copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); is_swbp = is_swbp_insn(&old_opcode); if (is_swbp_insn(new_opcode)) { if (is_swbp) /* register: already installed? */ return 0; } else { if (!is_swbp) /* unregister: was it changed by us? */ return 0; } return 1; } static struct delayed_uprobe * delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) { struct delayed_uprobe *du; list_for_each_entry(du, &delayed_uprobe_list, list) if (du->uprobe == uprobe && du->mm == mm) return du; return NULL; } static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) { struct delayed_uprobe *du; if (delayed_uprobe_check(uprobe, mm)) return 0; du = kzalloc(sizeof(*du), GFP_KERNEL); if (!du) return -ENOMEM; du->uprobe = uprobe; du->mm = mm; list_add(&du->list, &delayed_uprobe_list); return 0; } static void delayed_uprobe_delete(struct delayed_uprobe *du) { if (WARN_ON(!du)) return; list_del(&du->list); kfree(du); } static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) { struct list_head *pos, *q; struct delayed_uprobe *du; if (!uprobe && !mm) return; list_for_each_safe(pos, q, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (uprobe && du->uprobe != uprobe) continue; if (mm && du->mm != mm) continue; delayed_uprobe_delete(du); } } static bool valid_ref_ctr_vma(struct uprobe *uprobe, struct vm_area_struct *vma) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset); return uprobe->ref_ctr_offset && vma->vm_file && file_inode(vma->vm_file) == uprobe->inode && (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && vma->vm_start <= vaddr && vma->vm_end > vaddr; } static struct vm_area_struct * find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *tmp; for_each_vma(vmi, tmp) if (valid_ref_ctr_vma(uprobe, tmp)) return tmp; return NULL; } static int __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) { void *kaddr; struct page *page; int ret; short *ptr; if (!vaddr || !d) return -EINVAL; ret = get_user_pages_remote(mm, vaddr, 1, FOLL_WRITE, &page, NULL); if (unlikely(ret <= 0)) { /* * We are asking for 1 page. If get_user_pages_remote() fails, * it may return 0, in that case we have to return error. */ return ret == 0 ? -EBUSY : ret; } kaddr = kmap_atomic(page); ptr = kaddr + (vaddr & ~PAGE_MASK); if (unlikely(*ptr + d < 0)) { pr_warn("ref_ctr going negative. vaddr: 0x%lx, " "curr val: %d, delta: %d\n", vaddr, *ptr, d); ret = -EINVAL; goto out; } *ptr += d; ret = 0; out: kunmap_atomic(kaddr); put_page(page); return ret; } static void update_ref_ctr_warn(struct uprobe *uprobe, struct mm_struct *mm, short d) { pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n", d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) uprobe->ref_ctr_offset, mm); } static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, short d) { struct vm_area_struct *rc_vma; unsigned long rc_vaddr; int ret = 0; rc_vma = find_ref_ctr_vma(uprobe, mm); if (rc_vma) { rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset); ret = __update_ref_ctr(mm, rc_vaddr, d); if (ret) update_ref_ctr_warn(uprobe, mm, d); if (d > 0) return ret; } mutex_lock(&delayed_uprobe_lock); if (d > 0) ret = delayed_uprobe_add(uprobe, mm); else delayed_uprobe_remove(uprobe, mm); mutex_unlock(&delayed_uprobe_lock); return ret; } /* * NOTE: * Expect the breakpoint instruction to be the smallest size instruction for * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction * supported by that architecture then we need to modify is_trap_at_addr and * uprobe_write_opcode accordingly. This would never be a problem for archs * that have fixed length instructions. * * uprobe_write_opcode - write the opcode at a given virtual address. * @auprobe: arch specific probepoint information. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. * * Called with mm->mmap_lock held for write. * Return 0 (success) or a negative errno. */ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct uprobe *uprobe; struct page *old_page, *new_page; struct vm_area_struct *vma; int ret, is_register, ref_ctr_updated = 0; bool orig_page_huge = false; unsigned int gup_flags = FOLL_FORCE; is_register = is_swbp_insn(&opcode); uprobe = container_of(auprobe, struct uprobe, arch); retry: if (is_register) gup_flags |= FOLL_SPLIT_PMD; /* Read the page with vaddr into memory */ old_page = get_user_page_vma_remote(mm, vaddr, gup_flags, &vma); if (IS_ERR(old_page)) return PTR_ERR(old_page); ret = verify_opcode(old_page, vaddr, &opcode); if (ret <= 0) goto put_old; if (WARN(!is_register && PageCompound(old_page), "uprobe unregister should never work on compound page\n")) { ret = -EINVAL; goto put_old; } /* We are going to replace instruction, update ref_ctr. */ if (!ref_ctr_updated && uprobe->ref_ctr_offset) { ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); if (ret) goto put_old; ref_ctr_updated = 1; } ret = 0; if (!is_register && !PageAnon(old_page)) goto put_old; ret = anon_vma_prepare(vma); if (ret) goto put_old; ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) goto put_old; __SetPageUptodate(new_page); copy_highpage(new_page, old_page); copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); if (!is_register) { struct page *orig_page; pgoff_t index; VM_BUG_ON_PAGE(!PageAnon(old_page), old_page); index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT; orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index); if (orig_page) { if (PageUptodate(orig_page) && pages_identical(new_page, orig_page)) { /* let go new_page */ put_page(new_page); new_page = NULL; if (PageCompound(orig_page)) orig_page_huge = true; } put_page(orig_page); } } ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page); if (new_page) put_page(new_page); put_old: put_page(old_page); if (unlikely(ret == -EAGAIN)) goto retry; /* Revert back reference counter if instruction update failed. */ if (ret && is_register && ref_ctr_updated) update_ref_ctr(uprobe, mm, -1); /* try collapse pmd for compound page */ if (!ret && orig_page_huge) collapse_pte_mapped_thp(mm, vaddr, false); return ret; } /** * set_swbp - store breakpoint at a given address. * @auprobe: arch specific probepoint information. * @mm: the probed process address space. * @vaddr: the virtual address to insert the opcode. * * For mm @mm, store the breakpoint instruction at @vaddr. * Return 0 (success) or a negative errno. */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); } /** * set_orig_insn - Restore the original instruction. * @mm: the probed process address space. * @auprobe: arch specific probepoint information. * @vaddr: the virtual address to insert the opcode. * * For mm @mm, restore the original opcode (opcode) at @vaddr. * Return 0 (success) or a negative errno. */ int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { return uprobe_write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn); } static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); return uprobe; } static void put_uprobe(struct uprobe *uprobe) { if (refcount_dec_and_test(&uprobe->ref)) { /* * If application munmap(exec_vma) before uprobe_unregister() * gets called, we don't get a chance to remove uprobe from * delayed_uprobe_list from remove_breakpoint(). Do it here. */ mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); kfree(uprobe); } } static __always_inline int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset, const struct uprobe *r) { if (l_inode < r->inode) return -1; if (l_inode > r->inode) return 1; if (l_offset < r->offset) return -1; if (l_offset > r->offset) return 1; return 0; } #define __node_2_uprobe(node) \ rb_entry((node), struct uprobe, rb_node) struct __uprobe_key { struct inode *inode; loff_t offset; }; static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b) { const struct __uprobe_key *a = key; return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b)); } static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b) { struct uprobe *u = __node_2_uprobe(a); return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b)); } static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) { struct __uprobe_key key = { .inode = inode, .offset = offset, }; struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key); if (node) return get_uprobe(__node_2_uprobe(node)); return NULL; } /* * Find a uprobe corresponding to a given inode:offset * Acquires uprobes_treelock */ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) { struct uprobe *uprobe; spin_lock(&uprobes_treelock); uprobe = __find_uprobe(inode, offset); spin_unlock(&uprobes_treelock); return uprobe; } static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { struct rb_node *node; node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); if (node) return get_uprobe(__node_2_uprobe(node)); /* get access + creation ref */ refcount_set(&uprobe->ref, 2); return NULL; } /* * Acquire uprobes_treelock. * Matching uprobe already exists in rbtree; * increment (access refcount) and return the matching uprobe. * * No matching uprobe; insert the uprobe in rb_tree; * get a double refcount (access + creation) and return NULL. */ static struct uprobe *insert_uprobe(struct uprobe *uprobe) { struct uprobe *u; spin_lock(&uprobes_treelock); u = __insert_uprobe(uprobe); spin_unlock(&uprobes_treelock); return u; } static void ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe) { pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx " "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) cur_uprobe->ref_ctr_offset, (unsigned long long) uprobe->ref_ctr_offset); } static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, loff_t ref_ctr_offset) { struct uprobe *uprobe, *cur_uprobe; uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL); if (!uprobe) return NULL; uprobe->inode = inode; uprobe->offset = offset; uprobe->ref_ctr_offset = ref_ctr_offset; init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); /* a uprobe exists for this inode:offset combination */ if (cur_uprobe) { if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) { ref_ctr_mismatch_warn(cur_uprobe, uprobe); put_uprobe(cur_uprobe); kfree(uprobe); return ERR_PTR(-EINVAL); } kfree(uprobe); uprobe = cur_uprobe; } return uprobe; } static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); uc->next = uprobe->consumers; uprobe->consumers = uc; up_write(&uprobe->consumer_rwsem); } /* * For uprobe @uprobe, delete the consumer @uc. * Return true if the @uc is deleted successfully * or return false. */ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) { struct uprobe_consumer **con; bool ret = false; down_write(&uprobe->consumer_rwsem); for (con = &uprobe->consumers; *con; con = &(*con)->next) { if (*con == uc) { *con = uc->next; ret = true; break; } } up_write(&uprobe->consumer_rwsem); return ret; } static int __copy_insn(struct address_space *mapping, struct file *filp, void *insn, int nbytes, loff_t offset) { struct page *page; /* * Ensure that the page that has the original instruction is populated * and in page-cache. If ->read_folio == NULL it must be shmem_mapping(), * see uprobe_register(). */ if (mapping->a_ops->read_folio) page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp); else page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) return PTR_ERR(page); copy_from_page(page, offset, insn, nbytes); put_page(page); return 0; } static int copy_insn(struct uprobe *uprobe, struct file *filp) { struct address_space *mapping = uprobe->inode->i_mapping; loff_t offs = uprobe->offset; void *insn = &uprobe->arch.insn; int size = sizeof(uprobe->arch.insn); int len, err = -EIO; /* Copy only available bytes, -EIO if nothing was read */ do { if (offs >= i_size_read(uprobe->inode)) break; len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); err = __copy_insn(mapping, filp, insn, len, offs); if (err) break; insn += len; offs += len; size -= len; } while (size); return err; } static int prepare_uprobe(struct uprobe *uprobe, struct file *file, struct mm_struct *mm, unsigned long vaddr) { int ret = 0; if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) return ret; /* TODO: move this into _register, until then we abuse this sem. */ down_write(&uprobe->consumer_rwsem); if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) goto out; ret = copy_insn(uprobe, file); if (ret) goto out; ret = -ENOTSUPP; if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn)) goto out; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); if (ret) goto out; smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */ set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: up_write(&uprobe->consumer_rwsem); return ret; } static inline bool consumer_filter(struct uprobe_consumer *uc, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { return !uc->filter || uc->filter(uc, ctx, mm); } static bool filter_chain(struct uprobe *uprobe, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { struct uprobe_consumer *uc; bool ret = false; down_read(&uprobe->consumer_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { ret = consumer_filter(uc, ctx, mm); if (ret) break; } up_read(&uprobe->consumer_rwsem); return ret; } static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) { bool first_uprobe; int ret; ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); if (ret) return ret; /* * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), * the task can hit this breakpoint right after __replace_page(). */ first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags); if (first_uprobe) set_bit(MMF_HAS_UPROBES, &mm->flags); ret = set_swbp(&uprobe->arch, mm, vaddr); if (!ret) clear_bit(MMF_RECALC_UPROBES, &mm->flags); else if (first_uprobe) clear_bit(MMF_HAS_UPROBES, &mm->flags); return ret; } static int remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { set_bit(MMF_RECALC_UPROBES, &mm->flags); return set_orig_insn(&uprobe->arch, mm, vaddr); } static inline bool uprobe_is_active(struct uprobe *uprobe) { return !RB_EMPTY_NODE(&uprobe->rb_node); } /* * There could be threads that have already hit the breakpoint. They * will recheck the current insn and restart if find_uprobe() fails. * See find_active_uprobe(). */ static void delete_uprobe(struct uprobe *uprobe) { if (WARN_ON(!uprobe_is_active(uprobe))) return; spin_lock(&uprobes_treelock); rb_erase(&uprobe->rb_node, &uprobes_tree); spin_unlock(&uprobes_treelock); RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ put_uprobe(uprobe); } struct map_info { struct map_info *next; struct mm_struct *mm; unsigned long vaddr; }; static inline struct map_info *free_map_info(struct map_info *info) { struct map_info *next = info->next; kfree(info); return next; } static struct map_info * build_map_info(struct address_space *mapping, loff_t offset, bool is_register) { unsigned long pgoff = offset >> PAGE_SHIFT; struct vm_area_struct *vma; struct map_info *curr = NULL; struct map_info *prev = NULL; struct map_info *info; int more = 0; again: i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; if (!prev && !more) { /* * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through * reclaim. This is optimistic, no harm done if it fails. */ prev = kmalloc(sizeof(struct map_info), GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); if (prev) prev->next = NULL; } if (!prev) { more++; continue; } if (!mmget_not_zero(vma->vm_mm)) continue; info = prev; prev = prev->next; info->next = curr; curr = info; info->mm = vma->vm_mm; info->vaddr = offset_to_vaddr(vma, offset); } i_mmap_unlock_read(mapping); if (!more) goto out; prev = curr; while (curr) { mmput(curr->mm); curr = curr->next; } do { info = kmalloc(sizeof(struct map_info), GFP_KERNEL); if (!info) { curr = ERR_PTR(-ENOMEM); goto out; } info->next = prev; prev = info; } while (--more); goto again; out: while (prev) prev = free_map_info(prev); return curr; } static int register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) { bool is_register = !!new; struct map_info *info; int err = 0; percpu_down_write(&dup_mmap_sem); info = build_map_info(uprobe->inode->i_mapping, uprobe->offset, is_register); if (IS_ERR(info)) { err = PTR_ERR(info); goto out; } while (info) { struct mm_struct *mm = info->mm; struct vm_area_struct *vma; if (err && is_register) goto free; mmap_write_lock(mm); vma = find_vma(mm, info->vaddr); if (!vma || !valid_vma(vma, is_register) || file_inode(vma->vm_file) != uprobe->inode) goto unlock; if (vma->vm_start > info->vaddr || vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; if (is_register) { /* consult only the "caller", new consumer. */ if (consumer_filter(new, UPROBE_FILTER_REGISTER, mm)) err = install_breakpoint(uprobe, mm, vma, info->vaddr); } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) { if (!filter_chain(uprobe, UPROBE_FILTER_UNREGISTER, mm)) err |= remove_breakpoint(uprobe, mm, info->vaddr); } unlock: mmap_write_unlock(mm); free: mmput(mm); info = free_map_info(info); } out: percpu_up_write(&dup_mmap_sem); return err; } static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) { int err; if (WARN_ON(!consumer_del(uprobe, uc))) return; err = register_for_each_vma(uprobe, NULL); /* TODO : cant unregister? schedule a worker thread */ if (!uprobe->consumers && !err) delete_uprobe(uprobe); } /* * uprobe_unregister - unregister an already registered probe. * @inode: the file in which the probe has to be removed. * @offset: offset from the start of the file. * @uc: identify which probe if multiple probes are colocated. */ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { struct uprobe *uprobe; uprobe = find_uprobe(inode, offset); if (WARN_ON(!uprobe)) return; down_write(&uprobe->register_rwsem); __uprobe_unregister(uprobe, uc); up_write(&uprobe->register_rwsem); put_uprobe(uprobe); } EXPORT_SYMBOL_GPL(uprobe_unregister); /* * __uprobe_register - register a probe * @inode: the file in which the probe has to be placed. * @offset: offset from the start of the file. * @uc: information on howto handle the probe.. * * Apart from the access refcount, __uprobe_register() takes a creation * refcount (thro alloc_uprobe) if and only if this @uprobe is getting * inserted into the rbtree (i.e first consumer for a @inode:@offset * tuple). Creation refcount stops uprobe_unregister from freeing the * @uprobe even before the register operation is complete. Creation * refcount is released when the last @uc for the @uprobe * unregisters. Caller of __uprobe_register() is required to keep @inode * (and the containing mount) referenced. * * Return errno if it cannot successully install probes * else return 0 (success) */ static int __uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { struct uprobe *uprobe; int ret; /* Uprobe must have at least one set consumer */ if (!uc->handler && !uc->ret_handler) return -EINVAL; /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ if (!inode->i_mapping->a_ops->read_folio && !shmem_mapping(inode->i_mapping)) return -EIO; /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return -EINVAL; /* * This ensures that copy_from_page(), copy_to_page() and * __update_ref_ctr() can't cross page boundary. */ if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE)) return -EINVAL; if (!IS_ALIGNED(ref_ctr_offset, sizeof(short))) return -EINVAL; retry: uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (!uprobe) return -ENOMEM; if (IS_ERR(uprobe)) return PTR_ERR(uprobe); /* * We can race with uprobe_unregister()->delete_uprobe(). * Check uprobe_is_active() and retry if it is false. */ down_write(&uprobe->register_rwsem); ret = -EAGAIN; if (likely(uprobe_is_active(uprobe))) { consumer_add(uprobe, uc); ret = register_for_each_vma(uprobe, uc); if (ret) __uprobe_unregister(uprobe, uc); } up_write(&uprobe->register_rwsem); put_uprobe(uprobe); if (unlikely(ret == -EAGAIN)) goto retry; return ret; } int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { return __uprobe_register(inode, offset, 0, uc); } EXPORT_SYMBOL_GPL(uprobe_register); int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { return __uprobe_register(inode, offset, ref_ctr_offset, uc); } EXPORT_SYMBOL_GPL(uprobe_register_refctr); /* * uprobe_apply - unregister an already registered probe. * @inode: the file in which the probe has to be removed. * @offset: offset from the start of the file. * @uc: consumer which wants to add more or remove some breakpoints * @add: add or remove the breakpoints */ int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) { struct uprobe *uprobe; struct uprobe_consumer *con; int ret = -ENOENT; uprobe = find_uprobe(inode, offset); if (WARN_ON(!uprobe)) return ret; down_write(&uprobe->register_rwsem); for (con = uprobe->consumers; con && con != uc ; con = con->next) ; if (con) ret = register_for_each_vma(uprobe, add ? uc : NULL); up_write(&uprobe->register_rwsem); put_uprobe(uprobe); return ret; } static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; int err = 0; mmap_read_lock(mm); for_each_vma(vmi, vma) { unsigned long vaddr; loff_t offset; if (!valid_vma(vma, false) || file_inode(vma->vm_file) != uprobe->inode) continue; offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; if (uprobe->offset < offset || uprobe->offset >= offset + vma->vm_end - vma->vm_start) continue; vaddr = offset_to_vaddr(vma, uprobe->offset); err |= remove_breakpoint(uprobe, mm, vaddr); } mmap_read_unlock(mm); return err; } static struct rb_node * find_node_in_range(struct inode *inode, loff_t min, loff_t max) { struct rb_node *n = uprobes_tree.rb_node; while (n) { struct uprobe *u = rb_entry(n, struct uprobe, rb_node); if (inode < u->inode) { n = n->rb_left; } else if (inode > u->inode) { n = n->rb_right; } else { if (max < u->offset) n = n->rb_left; else if (min > u->offset) n = n->rb_right; else break; } } return n; } /* * For a given range in vma, build a list of probes that need to be inserted. */ static void build_probe_list(struct inode *inode, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *head) { loff_t min, max; struct rb_node *n, *t; struct uprobe *u; INIT_LIST_HEAD(head); min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; spin_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); if (n) { for (t = n; t; t = rb_prev(t)) { u = rb_entry(t, struct uprobe, rb_node); if (u->inode != inode || u->offset < min) break; list_add(&u->pending_list, head); get_uprobe(u); } for (t = n; (t = rb_next(t)); ) { u = rb_entry(t, struct uprobe, rb_node); if (u->inode != inode || u->offset > max) break; list_add(&u->pending_list, head); get_uprobe(u); } } spin_unlock(&uprobes_treelock); } /* @vma contains reference counter, not the probed instruction. */ static int delayed_ref_ctr_inc(struct vm_area_struct *vma) { struct list_head *pos, *q; struct delayed_uprobe *du; unsigned long vaddr; int ret = 0, err = 0; mutex_lock(&delayed_uprobe_lock); list_for_each_safe(pos, q, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (du->mm != vma->vm_mm || !valid_ref_ctr_vma(du->uprobe, vma)) continue; vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset); ret = __update_ref_ctr(vma->vm_mm, vaddr, 1); if (ret) { update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1); if (!err) err = ret; } delayed_uprobe_delete(du); } mutex_unlock(&delayed_uprobe_lock); return err; } /* * Called from mmap_region/vma_merge with mm->mmap_lock acquired. * * Currently we ignore all errors and always return 0, the callers * can't handle the failure anyway. */ int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; if (no_uprobe_events()) return 0; if (vma->vm_file && (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) delayed_ref_ctr_inc(vma); if (!valid_vma(vma, true)) return 0; inode = file_inode(vma->vm_file); if (!inode) return 0; mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); /* * We can race with uprobe_unregister(), this uprobe can be already * removed. But in this case filter_chain() must return false, all * consumers have gone away. */ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!fatal_signal_pending(current) && filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); return 0; } static bool vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end) { loff_t min, max; struct inode *inode; struct rb_node *n; inode = file_inode(vma->vm_file); min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; spin_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); spin_unlock(&uprobes_treelock); return !!n; } /* * Called in context of a munmap of a vma. */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (no_uprobe_events() || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) || test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags)) return; if (vma_has_uprobes(vma, start, end)) set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags); } /* Slot allocation for XOL */ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { struct vm_area_struct *vma; int ret; if (mmap_write_lock_killable(mm)) return -EINTR; if (mm->uprobes_state.xol_area) { ret = -EALREADY; goto fail; } if (!area->vaddr) { /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(area->vaddr)) { ret = area->vaddr; goto fail; } } vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE, VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->xol_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto fail; } ret = 0; /* pairs with get_xol_area() */ smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ fail: mmap_write_unlock(mm); return ret; } static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct xol_area *area; area = kmalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; area->bitmap = kcalloc(BITS_TO_LONGS(UINSNS_PER_PAGE), sizeof(long), GFP_KERNEL); if (!area->bitmap) goto free_area; area->xol_mapping.name = "[uprobes]"; area->xol_mapping.fault = NULL; area->xol_mapping.pages = area->pages; area->pages[0] = alloc_page(GFP_HIGHUSER); if (!area->pages[0]) goto free_bitmap; area->pages[1] = NULL; area->vaddr = vaddr; init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); atomic_set(&area->slot_count, 1); arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE); if (!xol_add_vma(mm, area)) return area; __free_page(area->pages[0]); free_bitmap: kfree(area->bitmap); free_area: kfree(area); out: return NULL; } /* * get_xol_area - Allocate process's xol_area if necessary. * This area will be used for storing instructions for execution out of line. * * Returns the allocated area or NULL. */ static struct xol_area *get_xol_area(void) { struct mm_struct *mm = current->mm; struct xol_area *area; if (!mm->uprobes_state.xol_area) __create_xol_area(0); /* Pairs with xol_add_vma() smp_store_release() */ area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */ return area; } /* * uprobe_clear_state - Free the area allocated for slots. */ void uprobe_clear_state(struct mm_struct *mm) { struct xol_area *area = mm->uprobes_state.xol_area; mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(NULL, mm); mutex_unlock(&delayed_uprobe_lock); if (!area) return; put_page(area->pages[0]); kfree(area->bitmap); kfree(area); } void uprobe_start_dup_mmap(void) { percpu_down_read(&dup_mmap_sem); } void uprobe_end_dup_mmap(void) { percpu_up_read(&dup_mmap_sem); } void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) { set_bit(MMF_HAS_UPROBES, &newmm->flags); /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ set_bit(MMF_RECALC_UPROBES, &newmm->flags); } } /* * - search for a free slot. */ static unsigned long xol_take_insn_slot(struct xol_area *area) { unsigned long slot_addr; int slot_nr; do { slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE); if (slot_nr < UINSNS_PER_PAGE) { if (!test_and_set_bit(slot_nr, area->bitmap)) break; slot_nr = UINSNS_PER_PAGE; continue; } wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE)); } while (slot_nr >= UINSNS_PER_PAGE); slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES); atomic_inc(&area->slot_count); return slot_addr; } /* * xol_get_insn_slot - allocate a slot for xol. * Returns the allocated slot address or 0. */ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) { struct xol_area *area; unsigned long xol_vaddr; area = get_xol_area(); if (!area) return 0; xol_vaddr = xol_take_insn_slot(area); if (unlikely(!xol_vaddr)) return 0; arch_uprobe_copy_ixol(area->pages[0], xol_vaddr, &uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); return xol_vaddr; } /* * xol_free_insn_slot - If slot was earlier allocated by * @xol_get_insn_slot(), make the slot available for * subsequent requests. */ static void xol_free_insn_slot(struct task_struct *tsk) { struct xol_area *area; unsigned long vma_end; unsigned long slot_addr; if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask) return; slot_addr = tsk->utask->xol_vaddr; if (unlikely(!slot_addr)) return; area = tsk->mm->uprobes_state.xol_area; vma_end = area->vaddr + PAGE_SIZE; if (area->vaddr <= slot_addr && slot_addr < vma_end) { unsigned long offset; int slot_nr; offset = slot_addr - area->vaddr; slot_nr = offset / UPROBE_XOL_SLOT_BYTES; if (slot_nr >= UINSNS_PER_PAGE) return; clear_bit(slot_nr, area->bitmap); atomic_dec(&area->slot_count); smp_mb__after_atomic(); /* pairs with prepare_to_wait() */ if (waitqueue_active(&area->wq)) wake_up(&area->wq); tsk->utask->xol_vaddr = 0; } } void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len) { /* Initialize the slot */ copy_to_page(page, vaddr, src, len); /* * We probably need flush_icache_user_page() but it needs vma. * This should work on most of architectures by default. If * architecture needs to do something different it can define * its own version of the function. */ flush_dcache_page(page); } /** * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs * @regs: Reflects the saved state of the task after it has hit a breakpoint * instruction. * Return the address of the breakpoint instruction. */ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) { return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; } unsigned long uprobe_get_trap_addr(struct pt_regs *regs) { struct uprobe_task *utask = current->utask; if (unlikely(utask && utask->active_uprobe)) return utask->vaddr; return instruction_pointer(regs); } static struct return_instance *free_ret_instance(struct return_instance *ri) { struct return_instance *next = ri->next; put_uprobe(ri->uprobe); kfree(ri); return next; } /* * Called with no locks held. * Called in context of an exiting or an exec-ing thread. */ void uprobe_free_utask(struct task_struct *t) { struct uprobe_task *utask = t->utask; struct return_instance *ri; if (!utask) return; if (utask->active_uprobe) put_uprobe(utask->active_uprobe); ri = utask->return_instances; while (ri) ri = free_ret_instance(ri); xol_free_insn_slot(t); kfree(utask); t->utask = NULL; } /* * Allocate a uprobe_task object for the task if necessary. * Called when the thread hits a breakpoint. * * Returns: * - pointer to new uprobe_task on success * - NULL otherwise */ static struct uprobe_task *get_utask(void) { if (!current->utask) current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); return current->utask; } static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) { struct uprobe_task *n_utask; struct return_instance **p, *o, *n; n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); if (!n_utask) return -ENOMEM; t->utask = n_utask; p = &n_utask->return_instances; for (o = o_utask->return_instances; o; o = o->next) { n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); if (!n) return -ENOMEM; *n = *o; get_uprobe(n->uprobe); n->next = NULL; *p = n; p = &n->next; n_utask->depth++; } return 0; } static void uprobe_warn(struct task_struct *t, const char *msg) { pr_warn("uprobe: %s:%d failed to %s\n", current->comm, current->pid, msg); } static void dup_xol_work(struct callback_head *work) { if (current->flags & PF_EXITING) return; if (!__create_xol_area(current->utask->dup_xol_addr) && !fatal_signal_pending(current)) uprobe_warn(current, "dup xol area"); } /* * Called in context of a new clone/fork from copy_process. */ void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; struct xol_area *area; t->utask = NULL; if (!utask || !utask->return_instances) return; if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) return uprobe_warn(t, "dup ret instances"); /* The task can fork() after dup_xol_work() fails */ area = mm->uprobes_state.xol_area; if (!area) return uprobe_warn(t, "dup xol area"); if (mm == t->mm) return; t->utask->dup_xol_addr = area->vaddr; init_task_work(&t->utask->dup_xol_work, dup_xol_work); task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME); } /* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. * * Returns -1 in case the xol_area is not allocated. */ static unsigned long get_trampoline_vaddr(void) { struct xol_area *area; unsigned long trampoline_vaddr = -1; /* Pairs with xol_add_vma() smp_store_release() */ area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */ if (area) trampoline_vaddr = area->vaddr; return trampoline_vaddr; } static void cleanup_return_instances(struct uprobe_task *utask, bool chained, struct pt_regs *regs) { struct return_instance *ri = utask->return_instances; enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL; while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) { ri = free_ret_instance(ri); utask->depth--; } utask->return_instances = ri; } static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) { struct return_instance *ri; struct uprobe_task *utask; unsigned long orig_ret_vaddr, trampoline_vaddr; bool chained; if (!get_xol_area()) return; utask = get_utask(); if (!utask) return; if (utask->depth >= MAX_URETPROBE_DEPTH) { printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" " nestedness limit pid/tgid=%d/%d\n", current->pid, current->tgid); return; } ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL); if (!ri) return; trampoline_vaddr = get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) goto fail; /* drop the entries invalidated by longjmp() */ chained = (orig_ret_vaddr == trampoline_vaddr); cleanup_return_instances(utask, chained, regs); /* * We don't want to keep trampoline address in stack, rather keep the * original return address of first caller thru all the consequent * instances. This also makes breakpoint unwrapping easier. */ if (chained) { if (!utask->return_instances) { /* * This situation is not possible. Likely we have an * attack from user-space. */ uprobe_warn(current, "handle tail call"); goto fail; } orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; } ri->uprobe = get_uprobe(uprobe); ri->func = instruction_pointer(regs); ri->stack = user_stack_pointer(regs); ri->orig_ret_vaddr = orig_ret_vaddr; ri->chained = chained; utask->depth++; ri->next = utask->return_instances; utask->return_instances = ri; return; fail: kfree(ri); } /* Prepare to single-step probed instruction out of line. */ static int pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) { struct uprobe_task *utask; unsigned long xol_vaddr; int err; utask = get_utask(); if (!utask) return -ENOMEM; xol_vaddr = xol_get_insn_slot(uprobe); if (!xol_vaddr) return -ENOMEM; utask->xol_vaddr = xol_vaddr; utask->vaddr = bp_vaddr; err = arch_uprobe_pre_xol(&uprobe->arch, regs); if (unlikely(err)) { xol_free_insn_slot(current); return err; } utask->active_uprobe = uprobe; utask->state = UTASK_SSTEP; return 0; } /* * If we are singlestepping, then ensure this thread is not connected to * non-fatal signals until completion of singlestep. When xol insn itself * triggers the signal, restart the original insn even if the task is * already SIGKILL'ed (since coredump should report the correct ip). This * is even more important if the task has a handler for SIGSEGV/etc, The * _same_ instruction should be repeated again after return from the signal * handler, and SSTEP can never finish in this case. */ bool uprobe_deny_signal(void) { struct task_struct *t = current; struct uprobe_task *utask = t->utask; if (likely(!utask || !utask->active_uprobe)) return false; WARN_ON_ONCE(utask->state != UTASK_SSTEP); if (task_sigpending(t)) { spin_lock_irq(&t->sighand->siglock); clear_tsk_thread_flag(t, TIF_SIGPENDING); spin_unlock_irq(&t->sighand->siglock); if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { utask->state = UTASK_SSTEP_TRAPPED; set_tsk_thread_flag(t, TIF_UPROBE); } } return true; } static void mmf_recalc_uprobes(struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; for_each_vma(vmi, vma) { if (!valid_vma(vma, false)) continue; /* * This is not strictly accurate, we can race with * uprobe_unregister() and see the already removed * uprobe if delete_uprobe() was not yet called. * Or this uprobe can be filtered out. */ if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) return; } clear_bit(MMF_HAS_UPROBES, &mm->flags); } static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) { struct page *page; uprobe_opcode_t opcode; int result; if (WARN_ON_ONCE(!IS_ALIGNED(vaddr, UPROBE_SWBP_INSN_SIZE))) return -EINVAL; pagefault_disable(); result = __get_user(opcode, (uprobe_opcode_t __user *)vaddr); pagefault_enable(); if (likely(result == 0)) goto out; /* * The NULL 'tsk' here ensures that any faults that occur here * will not be accounted to the task. 'mm' *is* current->mm, * but we treat this as a 'remote' access since it is * essentially a kernel access to the memory. */ result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, NULL); if (result < 0) return result; copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); put_page(page); out: /* This needs to return true for any variant of the trap insn */ return is_trap_insn(&opcode); } static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; struct uprobe *uprobe = NULL; struct vm_area_struct *vma; mmap_read_lock(mm); vma = vma_lookup(mm, bp_vaddr); if (vma) { if (valid_vma(vma, false)) { struct inode *inode = file_inode(vma->vm_file); loff_t offset = vaddr_to_offset(vma, bp_vaddr); uprobe = find_uprobe(inode, offset); } if (!uprobe) *is_swbp = is_trap_at_addr(mm, bp_vaddr); } else { *is_swbp = -EFAULT; } if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) mmf_recalc_uprobes(mm); mmap_read_unlock(mm); return uprobe; } static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; int remove = UPROBE_HANDLER_REMOVE; bool need_prep = false; /* prepare return uprobe, when needed */ down_read(&uprobe->register_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { int rc = 0; if (uc->handler) { rc = uc->handler(uc, regs); WARN(rc & ~UPROBE_HANDLER_MASK, "bad rc=0x%x from %ps()\n", rc, uc->handler); } if (uc->ret_handler) need_prep = true; remove &= rc; } if (need_prep && !remove) prepare_uretprobe(uprobe, regs); /* put bp at return */ if (remove && uprobe->consumers) { WARN_ON(!uprobe_is_active(uprobe)); unapply_uprobe(uprobe, current->mm); } up_read(&uprobe->register_rwsem); } static void handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) { struct uprobe *uprobe = ri->uprobe; struct uprobe_consumer *uc; down_read(&uprobe->register_rwsem); for (uc = uprobe->consumers; uc; uc = uc->next) { if (uc->ret_handler) uc->ret_handler(uc, ri->func, regs); } up_read(&uprobe->register_rwsem); } static struct return_instance *find_next_ret_chain(struct return_instance *ri) { bool chained; do { chained = ri->chained; ri = ri->next; /* can't be NULL if chained */ } while (chained); return ri; } static void handle_trampoline(struct pt_regs *regs) { struct uprobe_task *utask; struct return_instance *ri, *next; bool valid; utask = current->utask; if (!utask) goto sigill; ri = utask->return_instances; if (!ri) goto sigill; do { /* * We should throw out the frames invalidated by longjmp(). * If this chain is valid, then the next one should be alive * or NULL; the latter case means that nobody but ri->func * could hit this trampoline on return. TODO: sigaltstack(). */ next = find_next_ret_chain(ri); valid = !next || arch_uretprobe_is_alive(next, RP_CHECK_RET, regs); instruction_pointer_set(regs, ri->orig_ret_vaddr); do { if (valid) handle_uretprobe_chain(ri, regs); ri = free_ret_instance(ri); utask->depth--; } while (ri != next); } while (!valid); utask->return_instances = ri; return; sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); force_sig(SIGILL); } bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) { return false; } bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs) { return true; } /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. */ static void handle_swbp(struct pt_regs *regs) { struct uprobe *uprobe; unsigned long bp_vaddr; int is_swbp; bp_vaddr = uprobe_get_swbp_addr(regs); if (bp_vaddr == get_trampoline_vaddr()) return handle_trampoline(regs); uprobe = find_active_uprobe(bp_vaddr, &is_swbp); if (!uprobe) { if (is_swbp > 0) { /* No matching uprobe; signal SIGTRAP. */ force_sig(SIGTRAP); } else { /* * Either we raced with uprobe_unregister() or we can't * access this memory. The latter is only possible if * another thread plays with our ->mm. In both cases * we can simply restart. If this vma was unmapped we * can pretend this insn was not executed yet and get * the (correct) SIGSEGV after restart. */ instruction_pointer_set(regs, bp_vaddr); } return; } /* change it in advance for ->handler() and restart */ instruction_pointer_set(regs, bp_vaddr); /* * TODO: move copy_insn/etc into _register and remove this hack. * After we hit the bp, _unregister + _register can install the * new and not-yet-analyzed uprobe at the same address, restart. */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; /* * Pairs with the smp_wmb() in prepare_uprobe(). * * Guarantees that if we see the UPROBE_COPY_INSN bit set, then * we must also see the stores to &uprobe->arch performed by the * prepare_uprobe() call. */ smp_rmb(); /* Tracing handlers use ->utask to communicate with fetch methods */ if (!get_utask()) goto out; if (arch_uprobe_ignore(&uprobe->arch, regs)) goto out; handler_chain(uprobe, regs); if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; if (!pre_ssout(uprobe, regs, bp_vaddr)) return; /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ out: put_uprobe(uprobe); } /* * Perform required fix-ups and disable singlestep. * Allow pending signals to take effect. */ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) { struct uprobe *uprobe; int err = 0; uprobe = utask->active_uprobe; if (utask->state == UTASK_SSTEP_ACK) err = arch_uprobe_post_xol(&uprobe->arch, regs); else if (utask->state == UTASK_SSTEP_TRAPPED) arch_uprobe_abort_xol(&uprobe->arch, regs); else WARN_ON_ONCE(1); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; xol_free_insn_slot(current); spin_lock_irq(¤t->sighand->siglock); recalc_sigpending(); /* see uprobe_deny_signal() */ spin_unlock_irq(¤t->sighand->siglock); if (unlikely(err)) { uprobe_warn(current, "execute the probed insn, sending SIGILL."); force_sig(SIGILL); } } /* * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and * allows the thread to return from interrupt. After that handle_swbp() * sets utask->active_uprobe. * * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag * and allows the thread to return from interrupt. * * While returning to userspace, thread notices the TIF_UPROBE flag and calls * uprobe_notify_resume(). */ void uprobe_notify_resume(struct pt_regs *regs) { struct uprobe_task *utask; clear_thread_flag(TIF_UPROBE); utask = current->utask; if (utask && utask->active_uprobe) handle_singlestep(utask, regs); else handle_swbp(regs); } /* * uprobe_pre_sstep_notifier gets called from interrupt context as part of * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit. */ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { if (!current->mm) return 0; if (!test_bit(MMF_HAS_UPROBES, ¤t->mm->flags) && (!current->utask || !current->utask->return_instances)) return 0; set_thread_flag(TIF_UPROBE); return 1; } /* * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep. */ int uprobe_post_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask = current->utask; if (!current->mm || !utask || !utask->active_uprobe) /* task is currently not uprobed */ return 0; utask->state = UTASK_SSTEP_ACK; set_thread_flag(TIF_UPROBE); return 1; } static struct notifier_block uprobe_exception_nb = { .notifier_call = arch_uprobe_exception_notify, .priority = INT_MAX-1, /* notified after kprobes, kgdb */ }; void __init uprobes_init(void) { int i; for (i = 0; i < UPROBES_HASH_SZ; i++) mutex_init(&uprobes_mmap_mutex[i]); BUG_ON(register_die_notifier(&uprobe_exception_nb)); } |
1 8 4 4 1 2 2 2 2 2 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 8 8 8 8 7 8 1 8 8 8 9 9 9 8 8 8 8 8 8 7 9 9 10 10 10 10 1 1 1 1 1 1 1 1 1 9 6 2 9 9 9 9 8 9 8 1 1 8 9 9 1 9 9 9 9 4 4 5 6 7 2 8 8 8 8 8 7 8 7 7 7 2 5 1 1 1 1 1 1 1 1 8 8 3 3 3 1 1 1 1 2 1 1 3 1 2 1 1 1 2 2 2 1 2 2 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 | // SPDX-License-Identifier: GPL-2.0-only /* * vivid-vid-cap.c - video capture support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/vmalloc.h> #include <linux/videodev2.h> #include <linux/v4l2-dv-timings.h> #include <media/v4l2-common.h> #include <media/v4l2-event.h> #include <media/v4l2-dv-timings.h> #include <media/v4l2-rect.h> #include "vivid-core.h" #include "vivid-vid-common.h" #include "vivid-kthread-cap.h" #include "vivid-vid-cap.h" /* Sizes must be in increasing order */ static const struct v4l2_frmsize_discrete webcam_sizes[] = { { 320, 180 }, { 640, 360 }, { 640, 480 }, { 1280, 720 }, { 1920, 1080 }, { 3840, 2160 }, }; /* * Intervals must be in increasing order and there must be twice as many * elements in this array as there are in webcam_sizes. */ static const struct v4l2_fract webcam_intervals[] = { { 1, 1 }, { 1, 2 }, { 1, 4 }, { 1, 5 }, { 1, 10 }, { 2, 25 }, { 1, 15 }, /* 7 - maximum for 2160p */ { 1, 25 }, { 1, 30 }, /* 9 - maximum for 1080p */ { 1, 40 }, { 1, 50 }, { 1, 60 }, /* 12 - maximum for 720p */ { 1, 120 }, }; /* Limit maximum FPS rates for high resolutions */ #define IVAL_COUNT_720P 12 /* 720p and up is limited to 60 fps */ #define IVAL_COUNT_1080P 9 /* 1080p and up is limited to 30 fps */ #define IVAL_COUNT_2160P 7 /* 2160p and up is limited to 15 fps */ static inline unsigned int webcam_ival_count(const struct vivid_dev *dev, unsigned int frmsize_idx) { if (webcam_sizes[frmsize_idx].height >= 2160) return IVAL_COUNT_2160P; if (webcam_sizes[frmsize_idx].height >= 1080) return IVAL_COUNT_1080P; if (webcam_sizes[frmsize_idx].height >= 720) return IVAL_COUNT_720P; /* For low resolutions, allow all FPS rates */ return ARRAY_SIZE(webcam_intervals); } static int vid_cap_queue_setup(struct vb2_queue *vq, unsigned *nbuffers, unsigned *nplanes, unsigned sizes[], struct device *alloc_devs[]) { struct vivid_dev *dev = vb2_get_drv_priv(vq); unsigned buffers = tpg_g_buffers(&dev->tpg); unsigned h = dev->fmt_cap_rect.height; unsigned p; if (dev->field_cap == V4L2_FIELD_ALTERNATE) { /* * You cannot use read() with FIELD_ALTERNATE since the field * information (TOP/BOTTOM) cannot be passed back to the user. */ if (vb2_fileio_is_active(vq)) return -EINVAL; } if (dev->queue_setup_error) { /* * Error injection: test what happens if queue_setup() returns * an error. */ dev->queue_setup_error = false; return -EINVAL; } if (*nplanes) { /* * Check if the number of requested planes match * the number of buffers in the current format. You can't mix that. */ if (*nplanes != buffers) return -EINVAL; for (p = 0; p < buffers; p++) { if (sizes[p] < tpg_g_line_width(&dev->tpg, p) * h + dev->fmt_cap->data_offset[p]) return -EINVAL; } } else { for (p = 0; p < buffers; p++) sizes[p] = (tpg_g_line_width(&dev->tpg, p) * h) / dev->fmt_cap->vdownsampling[p] + dev->fmt_cap->data_offset[p]; } *nplanes = buffers; dprintk(dev, 1, "%s: count=%d\n", __func__, *nbuffers); for (p = 0; p < buffers; p++) dprintk(dev, 1, "%s: size[%u]=%u\n", __func__, p, sizes[p]); return 0; } static int vid_cap_buf_prepare(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); unsigned long size; unsigned buffers = tpg_g_buffers(&dev->tpg); unsigned p; dprintk(dev, 1, "%s\n", __func__); if (WARN_ON(NULL == dev->fmt_cap)) return -EINVAL; if (dev->buf_prepare_error) { /* * Error injection: test what happens if buf_prepare() returns * an error. */ dev->buf_prepare_error = false; return -EINVAL; } for (p = 0; p < buffers; p++) { size = (tpg_g_line_width(&dev->tpg, p) * dev->fmt_cap_rect.height) / dev->fmt_cap->vdownsampling[p] + dev->fmt_cap->data_offset[p]; if (vb2_plane_size(vb, p) < size) { dprintk(dev, 1, "%s data will not fit into plane %u (%lu < %lu)\n", __func__, p, vb2_plane_size(vb, p), size); return -EINVAL; } vb2_set_plane_payload(vb, p, size); vb->planes[p].data_offset = dev->fmt_cap->data_offset[p]; } return 0; } static void vid_cap_buf_finish(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct v4l2_timecode *tc = &vbuf->timecode; unsigned fps = 25; unsigned seq = vbuf->sequence; if (!vivid_is_sdtv_cap(dev)) return; /* * Set the timecode. Rarely used, so it is interesting to * test this. */ vbuf->flags |= V4L2_BUF_FLAG_TIMECODE; if (dev->std_cap[dev->input] & V4L2_STD_525_60) fps = 30; tc->type = (fps == 30) ? V4L2_TC_TYPE_30FPS : V4L2_TC_TYPE_25FPS; tc->flags = 0; tc->frames = seq % fps; tc->seconds = (seq / fps) % 60; tc->minutes = (seq / (60 * fps)) % 60; tc->hours = (seq / (60 * 60 * fps)) % 24; } static void vid_cap_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct vivid_buffer *buf = container_of(vbuf, struct vivid_buffer, vb); dprintk(dev, 1, "%s\n", __func__); spin_lock(&dev->slock); list_add_tail(&buf->list, &dev->vid_cap_active); spin_unlock(&dev->slock); } static int vid_cap_start_streaming(struct vb2_queue *vq, unsigned count) { struct vivid_dev *dev = vb2_get_drv_priv(vq); unsigned i; int err; if (vb2_is_streaming(&dev->vb_vid_out_q)) dev->can_loop_video = vivid_vid_can_loop(dev); dev->vid_cap_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); for (i = 0; i < VIDEO_MAX_FRAME; i++) dev->must_blank[i] = tpg_g_perc_fill(&dev->tpg) < 100; if (dev->start_streaming_error) { dev->start_streaming_error = false; err = -EINVAL; } else { err = vivid_start_generating_vid_cap(dev, &dev->vid_cap_streaming); } if (err) { struct vivid_buffer *buf, *tmp; list_for_each_entry_safe(buf, tmp, &dev->vid_cap_active, list) { list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED); } } return err; } /* abort streaming and wait for last buffer */ static void vid_cap_stop_streaming(struct vb2_queue *vq) { struct vivid_dev *dev = vb2_get_drv_priv(vq); dprintk(dev, 1, "%s\n", __func__); vivid_stop_generating_vid_cap(dev, &dev->vid_cap_streaming); dev->can_loop_video = false; } static void vid_cap_buf_request_complete(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &dev->ctrl_hdl_vid_cap); } const struct vb2_ops vivid_vid_cap_qops = { .queue_setup = vid_cap_queue_setup, .buf_prepare = vid_cap_buf_prepare, .buf_finish = vid_cap_buf_finish, .buf_queue = vid_cap_buf_queue, .start_streaming = vid_cap_start_streaming, .stop_streaming = vid_cap_stop_streaming, .buf_request_complete = vid_cap_buf_request_complete, .wait_prepare = vb2_ops_wait_prepare, .wait_finish = vb2_ops_wait_finish, }; /* * Determine the 'picture' quality based on the current TV frequency: either * COLOR for a good 'signal', GRAY (grayscale picture) for a slightly off * signal or NOISE for no signal. */ void vivid_update_quality(struct vivid_dev *dev) { unsigned freq_modulus; if (dev->loop_video && (vivid_is_svid_cap(dev) || vivid_is_hdmi_cap(dev))) { /* * The 'noise' will only be replaced by the actual video * if the output video matches the input video settings. */ tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, 0); return; } if (vivid_is_hdmi_cap(dev) && VIVID_INVALID_SIGNAL(dev->dv_timings_signal_mode[dev->input])) { tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, 0); return; } if (vivid_is_sdtv_cap(dev) && VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) { tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, 0); return; } if (!vivid_is_tv_cap(dev)) { tpg_s_quality(&dev->tpg, TPG_QUAL_COLOR, 0); return; } /* * There is a fake channel every 6 MHz at 49.25, 55.25, etc. * From +/- 0.25 MHz around the channel there is color, and from * +/- 1 MHz there is grayscale (chroma is lost). * Everywhere else it is just noise. */ freq_modulus = (dev->tv_freq - 676 /* (43.25-1) * 16 */) % (6 * 16); if (freq_modulus > 2 * 16) { tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, next_pseudo_random32(dev->tv_freq ^ 0x55) & 0x3f); return; } if (freq_modulus < 12 /*0.75 * 16*/ || freq_modulus > 20 /*1.25 * 16*/) tpg_s_quality(&dev->tpg, TPG_QUAL_GRAY, 0); else tpg_s_quality(&dev->tpg, TPG_QUAL_COLOR, 0); } /* * Get the current picture quality and the associated afc value. */ static enum tpg_quality vivid_get_quality(struct vivid_dev *dev, s32 *afc) { unsigned freq_modulus; if (afc) *afc = 0; if (tpg_g_quality(&dev->tpg) == TPG_QUAL_COLOR || tpg_g_quality(&dev->tpg) == TPG_QUAL_NOISE) return tpg_g_quality(&dev->tpg); /* * There is a fake channel every 6 MHz at 49.25, 55.25, etc. * From +/- 0.25 MHz around the channel there is color, and from * +/- 1 MHz there is grayscale (chroma is lost). * Everywhere else it is just gray. */ freq_modulus = (dev->tv_freq - 676 /* (43.25-1) * 16 */) % (6 * 16); if (afc) *afc = freq_modulus - 1 * 16; return TPG_QUAL_GRAY; } enum tpg_video_aspect vivid_get_video_aspect(const struct vivid_dev *dev) { if (vivid_is_sdtv_cap(dev)) return dev->std_aspect_ratio[dev->input]; if (vivid_is_hdmi_cap(dev)) return dev->dv_timings_aspect_ratio[dev->input]; return TPG_VIDEO_ASPECT_IMAGE; } static enum tpg_pixel_aspect vivid_get_pixel_aspect(const struct vivid_dev *dev) { if (vivid_is_sdtv_cap(dev)) return (dev->std_cap[dev->input] & V4L2_STD_525_60) ? TPG_PIXEL_ASPECT_NTSC : TPG_PIXEL_ASPECT_PAL; if (vivid_is_hdmi_cap(dev) && dev->src_rect.width == 720 && dev->src_rect.height <= 576) return dev->src_rect.height == 480 ? TPG_PIXEL_ASPECT_NTSC : TPG_PIXEL_ASPECT_PAL; return TPG_PIXEL_ASPECT_SQUARE; } /* * Called whenever the format has to be reset which can occur when * changing inputs, standard, timings, etc. */ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls) { struct v4l2_bt_timings *bt = &dev->dv_timings_cap[dev->input].bt; u32 dims[V4L2_CTRL_MAX_DIMS] = {}; unsigned size; u64 pixelclock; switch (dev->input_type[dev->input]) { case WEBCAM: default: dev->src_rect.width = webcam_sizes[dev->webcam_size_idx].width; dev->src_rect.height = webcam_sizes[dev->webcam_size_idx].height; dev->timeperframe_vid_cap = webcam_intervals[dev->webcam_ival_idx]; dev->field_cap = V4L2_FIELD_NONE; tpg_s_rgb_range(&dev->tpg, V4L2_DV_RGB_RANGE_AUTO); break; case TV: case SVID: dev->field_cap = dev->tv_field_cap; dev->src_rect.width = 720; if (dev->std_cap[dev->input] & V4L2_STD_525_60) { dev->src_rect.height = 480; dev->timeperframe_vid_cap = (struct v4l2_fract) { 1001, 30000 }; dev->service_set_cap = V4L2_SLICED_CAPTION_525; } else { dev->src_rect.height = 576; dev->timeperframe_vid_cap = (struct v4l2_fract) { 1000, 25000 }; dev->service_set_cap = V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; } tpg_s_rgb_range(&dev->tpg, V4L2_DV_RGB_RANGE_AUTO); break; case HDMI: dev->src_rect.width = bt->width; dev->src_rect.height = bt->height; size = V4L2_DV_BT_FRAME_WIDTH(bt) * V4L2_DV_BT_FRAME_HEIGHT(bt); if (dev->reduced_fps && can_reduce_fps(bt)) { pixelclock = div_u64(bt->pixelclock * 1000, 1001); bt->flags |= V4L2_DV_FL_REDUCED_FPS; } else { pixelclock = bt->pixelclock; bt->flags &= ~V4L2_DV_FL_REDUCED_FPS; } dev->timeperframe_vid_cap = (struct v4l2_fract) { size / 100, (u32)pixelclock / 100 }; if (bt->interlaced) dev->field_cap = V4L2_FIELD_ALTERNATE; else dev->field_cap = V4L2_FIELD_NONE; /* * We can be called from within s_ctrl, in that case we can't * set/get controls. Luckily we don't need to in that case. */ if (keep_controls || !dev->colorspace) break; if (bt->flags & V4L2_DV_FL_IS_CE_VIDEO) { if (bt->width == 720 && bt->height <= 576) v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_170M); else v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_709); v4l2_ctrl_s_ctrl(dev->real_rgb_range_cap, 1); } else { v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_SRGB); v4l2_ctrl_s_ctrl(dev->real_rgb_range_cap, 0); } tpg_s_rgb_range(&dev->tpg, v4l2_ctrl_g_ctrl(dev->rgb_range_cap)); break; } vivid_update_quality(dev); tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap); dev->crop_cap = dev->src_rect; dev->crop_bounds_cap = dev->src_rect; dev->compose_cap = dev->crop_cap; if (V4L2_FIELD_HAS_T_OR_B(dev->field_cap)) dev->compose_cap.height /= 2; dev->fmt_cap_rect = dev->compose_cap; tpg_s_video_aspect(&dev->tpg, vivid_get_video_aspect(dev)); tpg_s_pixel_aspect(&dev->tpg, vivid_get_pixel_aspect(dev)); tpg_update_mv_step(&dev->tpg); /* * We can be called from within s_ctrl, in that case we can't * modify controls. Luckily we don't need to in that case. */ if (keep_controls) return; dims[0] = roundup(dev->src_rect.width, PIXEL_ARRAY_DIV); dims[1] = roundup(dev->src_rect.height, PIXEL_ARRAY_DIV); v4l2_ctrl_modify_dimensions(dev->pixel_array, dims); } /* Map the field to something that is valid for the current input */ static enum v4l2_field vivid_field_cap(struct vivid_dev *dev, enum v4l2_field field) { if (vivid_is_sdtv_cap(dev)) { switch (field) { case V4L2_FIELD_INTERLACED_TB: case V4L2_FIELD_INTERLACED_BT: case V4L2_FIELD_SEQ_TB: case V4L2_FIELD_SEQ_BT: case V4L2_FIELD_TOP: case V4L2_FIELD_BOTTOM: case V4L2_FIELD_ALTERNATE: return field; case V4L2_FIELD_INTERLACED: default: return V4L2_FIELD_INTERLACED; } } if (vivid_is_hdmi_cap(dev)) return dev->dv_timings_cap[dev->input].bt.interlaced ? V4L2_FIELD_ALTERNATE : V4L2_FIELD_NONE; return V4L2_FIELD_NONE; } static unsigned vivid_colorspace_cap(struct vivid_dev *dev) { if (!dev->loop_video || vivid_is_webcam(dev) || vivid_is_tv_cap(dev)) return tpg_g_colorspace(&dev->tpg); return dev->colorspace_out; } static unsigned vivid_xfer_func_cap(struct vivid_dev *dev) { if (!dev->loop_video || vivid_is_webcam(dev) || vivid_is_tv_cap(dev)) return tpg_g_xfer_func(&dev->tpg); return dev->xfer_func_out; } static unsigned vivid_ycbcr_enc_cap(struct vivid_dev *dev) { if (!dev->loop_video || vivid_is_webcam(dev) || vivid_is_tv_cap(dev)) return tpg_g_ycbcr_enc(&dev->tpg); return dev->ycbcr_enc_out; } static unsigned int vivid_hsv_enc_cap(struct vivid_dev *dev) { if (!dev->loop_video || vivid_is_webcam(dev) || vivid_is_tv_cap(dev)) return tpg_g_hsv_enc(&dev->tpg); return dev->hsv_enc_out; } static unsigned vivid_quantization_cap(struct vivid_dev *dev) { if (!dev->loop_video || vivid_is_webcam(dev) || vivid_is_tv_cap(dev)) return tpg_g_quantization(&dev->tpg); return dev->quantization_out; } int vivid_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; unsigned p; mp->width = dev->fmt_cap_rect.width; mp->height = dev->fmt_cap_rect.height; mp->field = dev->field_cap; mp->pixelformat = dev->fmt_cap->fourcc; mp->colorspace = vivid_colorspace_cap(dev); mp->xfer_func = vivid_xfer_func_cap(dev); if (dev->fmt_cap->color_enc == TGP_COLOR_ENC_HSV) mp->hsv_enc = vivid_hsv_enc_cap(dev); else mp->ycbcr_enc = vivid_ycbcr_enc_cap(dev); mp->quantization = vivid_quantization_cap(dev); mp->num_planes = dev->fmt_cap->buffers; for (p = 0; p < mp->num_planes; p++) { mp->plane_fmt[p].bytesperline = tpg_g_bytesperline(&dev->tpg, p); mp->plane_fmt[p].sizeimage = (tpg_g_line_width(&dev->tpg, p) * mp->height) / dev->fmt_cap->vdownsampling[p] + dev->fmt_cap->data_offset[p]; } return 0; } int vivid_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; struct v4l2_plane_pix_format *pfmt = mp->plane_fmt; struct vivid_dev *dev = video_drvdata(file); const struct vivid_fmt *fmt; unsigned bytesperline, max_bpl; unsigned factor = 1; unsigned w, h; unsigned p; bool user_set_csc = !!(mp->flags & V4L2_PIX_FMT_FLAG_SET_CSC); fmt = vivid_get_format(dev, mp->pixelformat); if (!fmt) { dprintk(dev, 1, "Fourcc format (0x%08x) unknown.\n", mp->pixelformat); mp->pixelformat = V4L2_PIX_FMT_YUYV; fmt = vivid_get_format(dev, mp->pixelformat); } mp->field = vivid_field_cap(dev, mp->field); if (vivid_is_webcam(dev)) { const struct v4l2_frmsize_discrete *sz = v4l2_find_nearest_size(webcam_sizes, ARRAY_SIZE(webcam_sizes), width, height, mp->width, mp->height); w = sz->width; h = sz->height; } else if (vivid_is_sdtv_cap(dev)) { w = 720; h = (dev->std_cap[dev->input] & V4L2_STD_525_60) ? 480 : 576; } else { w = dev->src_rect.width; h = dev->src_rect.height; } if (V4L2_FIELD_HAS_T_OR_B(mp->field)) factor = 2; if (vivid_is_webcam(dev) || (!dev->has_scaler_cap && !dev->has_crop_cap && !dev->has_compose_cap)) { mp->width = w; mp->height = h / factor; } else { struct v4l2_rect r = { 0, 0, mp->width, mp->height * factor }; v4l2_rect_set_min_size(&r, &vivid_min_rect); v4l2_rect_set_max_size(&r, &vivid_max_rect); if (dev->has_scaler_cap && !dev->has_compose_cap) { struct v4l2_rect max_r = { 0, 0, MAX_ZOOM * w, MAX_ZOOM * h }; v4l2_rect_set_max_size(&r, &max_r); } else if (!dev->has_scaler_cap && dev->has_crop_cap && !dev->has_compose_cap) { v4l2_rect_set_max_size(&r, &dev->src_rect); } else if (!dev->has_scaler_cap && !dev->has_crop_cap) { v4l2_rect_set_min_size(&r, &dev->src_rect); } mp->width = r.width; mp->height = r.height / factor; } /* This driver supports custom bytesperline values */ mp->num_planes = fmt->buffers; for (p = 0; p < fmt->buffers; p++) { /* Calculate the minimum supported bytesperline value */ bytesperline = (mp->width * fmt->bit_depth[p]) >> 3; /* Calculate the maximum supported bytesperline value */ max_bpl = (MAX_ZOOM * MAX_WIDTH * fmt->bit_depth[p]) >> 3; if (pfmt[p].bytesperline > max_bpl) pfmt[p].bytesperline = max_bpl; if (pfmt[p].bytesperline < bytesperline) pfmt[p].bytesperline = bytesperline; pfmt[p].sizeimage = (pfmt[p].bytesperline * mp->height) / fmt->vdownsampling[p] + fmt->data_offset[p]; memset(pfmt[p].reserved, 0, sizeof(pfmt[p].reserved)); } for (p = fmt->buffers; p < fmt->planes; p++) pfmt[0].sizeimage += (pfmt[0].bytesperline * mp->height * (fmt->bit_depth[p] / fmt->vdownsampling[p])) / (fmt->bit_depth[0] / fmt->vdownsampling[0]); if (!user_set_csc || !v4l2_is_colorspace_valid(mp->colorspace)) mp->colorspace = vivid_colorspace_cap(dev); if (!user_set_csc || !v4l2_is_xfer_func_valid(mp->xfer_func)) mp->xfer_func = vivid_xfer_func_cap(dev); if (fmt->color_enc == TGP_COLOR_ENC_HSV) { if (!user_set_csc || !v4l2_is_hsv_enc_valid(mp->hsv_enc)) mp->hsv_enc = vivid_hsv_enc_cap(dev); } else if (fmt->color_enc == TGP_COLOR_ENC_YCBCR) { if (!user_set_csc || !v4l2_is_ycbcr_enc_valid(mp->ycbcr_enc)) mp->ycbcr_enc = vivid_ycbcr_enc_cap(dev); } else { mp->ycbcr_enc = vivid_ycbcr_enc_cap(dev); } if (fmt->color_enc == TGP_COLOR_ENC_YCBCR || fmt->color_enc == TGP_COLOR_ENC_RGB) { if (!user_set_csc || !v4l2_is_quant_valid(mp->quantization)) mp->quantization = vivid_quantization_cap(dev); } else { mp->quantization = vivid_quantization_cap(dev); } memset(mp->reserved, 0, sizeof(mp->reserved)); return 0; } int vivid_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; struct vivid_dev *dev = video_drvdata(file); struct v4l2_rect *crop = &dev->crop_cap; struct v4l2_rect *compose = &dev->compose_cap; struct vb2_queue *q = &dev->vb_vid_cap_q; int ret = vivid_try_fmt_vid_cap(file, priv, f); unsigned factor = 1; unsigned p; unsigned i; if (ret < 0) return ret; if (vb2_is_busy(q)) { dprintk(dev, 1, "%s device busy\n", __func__); return -EBUSY; } dev->fmt_cap = vivid_get_format(dev, mp->pixelformat); if (V4L2_FIELD_HAS_T_OR_B(mp->field)) factor = 2; /* Note: the webcam input doesn't support scaling, cropping or composing */ if (!vivid_is_webcam(dev) && (dev->has_scaler_cap || dev->has_crop_cap || dev->has_compose_cap)) { struct v4l2_rect r = { 0, 0, mp->width, mp->height }; if (dev->has_scaler_cap) { if (dev->has_compose_cap) v4l2_rect_map_inside(compose, &r); else *compose = r; if (dev->has_crop_cap && !dev->has_compose_cap) { struct v4l2_rect min_r = { 0, 0, r.width / MAX_ZOOM, factor * r.height / MAX_ZOOM }; struct v4l2_rect max_r = { 0, 0, r.width * MAX_ZOOM, factor * r.height * MAX_ZOOM }; v4l2_rect_set_min_size(crop, &min_r); v4l2_rect_set_max_size(crop, &max_r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); } else if (dev->has_crop_cap) { struct v4l2_rect min_r = { 0, 0, compose->width / MAX_ZOOM, factor * compose->height / MAX_ZOOM }; struct v4l2_rect max_r = { 0, 0, compose->width * MAX_ZOOM, factor * compose->height * MAX_ZOOM }; v4l2_rect_set_min_size(crop, &min_r); v4l2_rect_set_max_size(crop, &max_r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); } } else if (dev->has_crop_cap && !dev->has_compose_cap) { r.height *= factor; v4l2_rect_set_size_to(crop, &r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); r = *crop; r.height /= factor; v4l2_rect_set_size_to(compose, &r); } else if (!dev->has_crop_cap) { v4l2_rect_map_inside(compose, &r); } else { r.height *= factor; v4l2_rect_set_max_size(crop, &r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); compose->top *= factor; compose->height *= factor; v4l2_rect_set_size_to(compose, crop); v4l2_rect_map_inside(compose, &r); compose->top /= factor; compose->height /= factor; } } else if (vivid_is_webcam(dev)) { unsigned int ival_sz = webcam_ival_count(dev, dev->webcam_size_idx); /* Guaranteed to be a match */ for (i = 0; i < ARRAY_SIZE(webcam_sizes); i++) if (webcam_sizes[i].width == mp->width && webcam_sizes[i].height == mp->height) break; dev->webcam_size_idx = i; if (dev->webcam_ival_idx >= ival_sz) dev->webcam_ival_idx = ival_sz - 1; vivid_update_format_cap(dev, false); } else { struct v4l2_rect r = { 0, 0, mp->width, mp->height }; v4l2_rect_set_size_to(compose, &r); r.height *= factor; v4l2_rect_set_size_to(crop, &r); } dev->fmt_cap_rect.width = mp->width; dev->fmt_cap_rect.height = mp->height; tpg_s_buf_height(&dev->tpg, mp->height); tpg_s_fourcc(&dev->tpg, dev->fmt_cap->fourcc); for (p = 0; p < tpg_g_buffers(&dev->tpg); p++) tpg_s_bytesperline(&dev->tpg, p, mp->plane_fmt[p].bytesperline); dev->field_cap = mp->field; if (dev->field_cap == V4L2_FIELD_ALTERNATE) tpg_s_field(&dev->tpg, V4L2_FIELD_TOP, true); else tpg_s_field(&dev->tpg, dev->field_cap, false); tpg_s_crop_compose(&dev->tpg, &dev->crop_cap, &dev->compose_cap); if (vivid_is_sdtv_cap(dev)) dev->tv_field_cap = mp->field; tpg_update_mv_step(&dev->tpg); dev->tpg.colorspace = mp->colorspace; dev->tpg.xfer_func = mp->xfer_func; if (dev->fmt_cap->color_enc == TGP_COLOR_ENC_YCBCR) dev->tpg.ycbcr_enc = mp->ycbcr_enc; else dev->tpg.hsv_enc = mp->hsv_enc; dev->tpg.quantization = mp->quantization; return 0; } int vidioc_g_fmt_vid_cap_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_g_fmt_vid_cap(file, priv, f); } int vidioc_try_fmt_vid_cap_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_try_fmt_vid_cap(file, priv, f); } int vidioc_s_fmt_vid_cap_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_s_fmt_vid_cap(file, priv, f); } int vidioc_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_g_fmt_vid_cap); } int vidioc_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_try_fmt_vid_cap); } int vidioc_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_s_fmt_vid_cap); } int vivid_vid_cap_g_selection(struct file *file, void *priv, struct v4l2_selection *sel) { struct vivid_dev *dev = video_drvdata(file); if (!dev->has_crop_cap && !dev->has_compose_cap) return -ENOTTY; if (sel->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; if (vivid_is_webcam(dev)) return -ENODATA; sel->r.left = sel->r.top = 0; switch (sel->target) { case V4L2_SEL_TGT_CROP: if (!dev->has_crop_cap) return -EINVAL; sel->r = dev->crop_cap; break; case V4L2_SEL_TGT_CROP_DEFAULT: case V4L2_SEL_TGT_CROP_BOUNDS: if (!dev->has_crop_cap) return -EINVAL; sel->r = dev->src_rect; break; case V4L2_SEL_TGT_COMPOSE_BOUNDS: if (!dev->has_compose_cap) return -EINVAL; sel->r = vivid_max_rect; break; case V4L2_SEL_TGT_COMPOSE: if (!dev->has_compose_cap) return -EINVAL; sel->r = dev->compose_cap; break; case V4L2_SEL_TGT_COMPOSE_DEFAULT: if (!dev->has_compose_cap) return -EINVAL; sel->r = dev->fmt_cap_rect; break; default: return -EINVAL; } return 0; } int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection *s) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_rect *crop = &dev->crop_cap; struct v4l2_rect *compose = &dev->compose_cap; unsigned factor = V4L2_FIELD_HAS_T_OR_B(dev->field_cap) ? 2 : 1; int ret; if (!dev->has_crop_cap && !dev->has_compose_cap) return -ENOTTY; if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; if (vivid_is_webcam(dev)) return -ENODATA; switch (s->target) { case V4L2_SEL_TGT_CROP: if (!dev->has_crop_cap) return -EINVAL; ret = vivid_vid_adjust_sel(s->flags, &s->r); if (ret) return ret; v4l2_rect_set_min_size(&s->r, &vivid_min_rect); v4l2_rect_set_max_size(&s->r, &dev->src_rect); v4l2_rect_map_inside(&s->r, &dev->crop_bounds_cap); s->r.top /= factor; s->r.height /= factor; if (dev->has_scaler_cap) { struct v4l2_rect fmt = dev->fmt_cap_rect; struct v4l2_rect max_rect = { 0, 0, s->r.width * MAX_ZOOM, s->r.height * MAX_ZOOM }; struct v4l2_rect min_rect = { 0, 0, s->r.width / MAX_ZOOM, s->r.height / MAX_ZOOM }; v4l2_rect_set_min_size(&fmt, &min_rect); if (!dev->has_compose_cap) v4l2_rect_set_max_size(&fmt, &max_rect); if (!v4l2_rect_same_size(&dev->fmt_cap_rect, &fmt) && vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; if (dev->has_compose_cap) { v4l2_rect_set_min_size(compose, &min_rect); v4l2_rect_set_max_size(compose, &max_rect); v4l2_rect_map_inside(compose, &fmt); } dev->fmt_cap_rect = fmt; tpg_s_buf_height(&dev->tpg, fmt.height); } else if (dev->has_compose_cap) { struct v4l2_rect fmt = dev->fmt_cap_rect; v4l2_rect_set_min_size(&fmt, &s->r); if (!v4l2_rect_same_size(&dev->fmt_cap_rect, &fmt) && vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; dev->fmt_cap_rect = fmt; tpg_s_buf_height(&dev->tpg, fmt.height); v4l2_rect_set_size_to(compose, &s->r); v4l2_rect_map_inside(compose, &dev->fmt_cap_rect); } else { if (!v4l2_rect_same_size(&s->r, &dev->fmt_cap_rect) && vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; v4l2_rect_set_size_to(&dev->fmt_cap_rect, &s->r); v4l2_rect_set_size_to(compose, &s->r); v4l2_rect_map_inside(compose, &dev->fmt_cap_rect); tpg_s_buf_height(&dev->tpg, dev->fmt_cap_rect.height); } s->r.top *= factor; s->r.height *= factor; *crop = s->r; break; case V4L2_SEL_TGT_COMPOSE: if (!dev->has_compose_cap) return -EINVAL; ret = vivid_vid_adjust_sel(s->flags, &s->r); if (ret) return ret; v4l2_rect_set_min_size(&s->r, &vivid_min_rect); v4l2_rect_set_max_size(&s->r, &dev->fmt_cap_rect); if (dev->has_scaler_cap) { struct v4l2_rect max_rect = { 0, 0, dev->src_rect.width * MAX_ZOOM, (dev->src_rect.height / factor) * MAX_ZOOM }; v4l2_rect_set_max_size(&s->r, &max_rect); if (dev->has_crop_cap) { struct v4l2_rect min_rect = { 0, 0, s->r.width / MAX_ZOOM, (s->r.height * factor) / MAX_ZOOM }; struct v4l2_rect max_rect = { 0, 0, s->r.width * MAX_ZOOM, (s->r.height * factor) * MAX_ZOOM }; v4l2_rect_set_min_size(crop, &min_rect); v4l2_rect_set_max_size(crop, &max_rect); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); } } else if (dev->has_crop_cap) { s->r.top *= factor; s->r.height *= factor; v4l2_rect_set_max_size(&s->r, &dev->src_rect); v4l2_rect_set_size_to(crop, &s->r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); s->r.top /= factor; s->r.height /= factor; } else { v4l2_rect_set_size_to(&s->r, &dev->src_rect); s->r.height /= factor; } v4l2_rect_map_inside(&s->r, &dev->fmt_cap_rect); *compose = s->r; break; default: return -EINVAL; } tpg_s_crop_compose(&dev->tpg, crop, compose); return 0; } int vivid_vid_cap_g_pixelaspect(struct file *file, void *priv, int type, struct v4l2_fract *f) { struct vivid_dev *dev = video_drvdata(file); if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; switch (vivid_get_pixel_aspect(dev)) { case TPG_PIXEL_ASPECT_NTSC: f->numerator = 11; f->denominator = 10; break; case TPG_PIXEL_ASPECT_PAL: f->numerator = 54; f->denominator = 59; break; default: break; } return 0; } static const struct v4l2_audio vivid_audio_inputs[] = { { 0, "TV", V4L2_AUDCAP_STEREO }, { 1, "Line-In", V4L2_AUDCAP_STEREO }, }; int vidioc_enum_input(struct file *file, void *priv, struct v4l2_input *inp) { struct vivid_dev *dev = video_drvdata(file); if (inp->index >= dev->num_inputs) return -EINVAL; inp->type = V4L2_INPUT_TYPE_CAMERA; switch (dev->input_type[inp->index]) { case WEBCAM: snprintf(inp->name, sizeof(inp->name), "Webcam %u", dev->input_name_counter[inp->index]); inp->capabilities = 0; break; case TV: snprintf(inp->name, sizeof(inp->name), "TV %u", dev->input_name_counter[inp->index]); inp->type = V4L2_INPUT_TYPE_TUNER; inp->std = V4L2_STD_ALL; if (dev->has_audio_inputs) inp->audioset = (1 << ARRAY_SIZE(vivid_audio_inputs)) - 1; inp->capabilities = V4L2_IN_CAP_STD; break; case SVID: snprintf(inp->name, sizeof(inp->name), "S-Video %u", dev->input_name_counter[inp->index]); inp->std = V4L2_STD_ALL; if (dev->has_audio_inputs) inp->audioset = (1 << ARRAY_SIZE(vivid_audio_inputs)) - 1; inp->capabilities = V4L2_IN_CAP_STD; break; case HDMI: snprintf(inp->name, sizeof(inp->name), "HDMI %u", dev->input_name_counter[inp->index]); inp->capabilities = V4L2_IN_CAP_DV_TIMINGS; if (dev->edid_blocks == 0 || dev->dv_timings_signal_mode[dev->input] == NO_SIGNAL) inp->status |= V4L2_IN_ST_NO_SIGNAL; else if (dev->dv_timings_signal_mode[dev->input] == NO_LOCK || dev->dv_timings_signal_mode[dev->input] == OUT_OF_RANGE) inp->status |= V4L2_IN_ST_NO_H_LOCK; break; } if (dev->sensor_hflip) inp->status |= V4L2_IN_ST_HFLIP; if (dev->sensor_vflip) inp->status |= V4L2_IN_ST_VFLIP; if (dev->input == inp->index && vivid_is_sdtv_cap(dev)) { if (dev->std_signal_mode[dev->input] == NO_SIGNAL) { inp->status |= V4L2_IN_ST_NO_SIGNAL; } else if (dev->std_signal_mode[dev->input] == NO_LOCK) { inp->status |= V4L2_IN_ST_NO_H_LOCK; } else if (vivid_is_tv_cap(dev)) { switch (tpg_g_quality(&dev->tpg)) { case TPG_QUAL_GRAY: inp->status |= V4L2_IN_ST_COLOR_KILL; break; case TPG_QUAL_NOISE: inp->status |= V4L2_IN_ST_NO_H_LOCK; break; default: break; } } } return 0; } int vidioc_g_input(struct file *file, void *priv, unsigned *i) { struct vivid_dev *dev = video_drvdata(file); *i = dev->input; return 0; } int vidioc_s_input(struct file *file, void *priv, unsigned i) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_bt_timings *bt = &dev->dv_timings_cap[dev->input].bt; unsigned brightness; if (i >= dev->num_inputs) return -EINVAL; if (i == dev->input) return 0; if (vb2_is_busy(&dev->vb_vid_cap_q) || vb2_is_busy(&dev->vb_vbi_cap_q) || vb2_is_busy(&dev->vb_meta_cap_q)) return -EBUSY; dev->input = i; dev->vid_cap_dev.tvnorms = 0; if (dev->input_type[i] == TV || dev->input_type[i] == SVID) { dev->tv_audio_input = (dev->input_type[i] == TV) ? 0 : 1; dev->vid_cap_dev.tvnorms = V4L2_STD_ALL; } dev->vbi_cap_dev.tvnorms = dev->vid_cap_dev.tvnorms; dev->meta_cap_dev.tvnorms = dev->vid_cap_dev.tvnorms; vivid_update_format_cap(dev, false); if (dev->colorspace) { switch (dev->input_type[i]) { case WEBCAM: v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_SRGB); break; case TV: case SVID: v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_170M); break; case HDMI: if (bt->flags & V4L2_DV_FL_IS_CE_VIDEO) { if (dev->src_rect.width == 720 && dev->src_rect.height <= 576) v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_170M); else v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_709); } else { v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_SRGB); } break; } } /* * Modify the brightness range depending on the input. * This makes it easy to use vivid to test if applications can * handle control range modifications and is also how this is * typically used in practice as different inputs may be hooked * up to different receivers with different control ranges. */ brightness = 128 * i + dev->input_brightness[i]; v4l2_ctrl_modify_range(dev->brightness, 128 * i, 255 + 128 * i, 1, 128 + 128 * i); v4l2_ctrl_s_ctrl(dev->brightness, brightness); /* Restore per-input states. */ v4l2_ctrl_activate(dev->ctrl_dv_timings_signal_mode, vivid_is_hdmi_cap(dev)); v4l2_ctrl_activate(dev->ctrl_dv_timings, vivid_is_hdmi_cap(dev) && dev->dv_timings_signal_mode[dev->input] == SELECTED_DV_TIMINGS); v4l2_ctrl_activate(dev->ctrl_std_signal_mode, vivid_is_sdtv_cap(dev)); v4l2_ctrl_activate(dev->ctrl_standard, vivid_is_sdtv_cap(dev) && dev->std_signal_mode[dev->input]); if (vivid_is_hdmi_cap(dev)) { v4l2_ctrl_s_ctrl(dev->ctrl_dv_timings_signal_mode, dev->dv_timings_signal_mode[dev->input]); v4l2_ctrl_s_ctrl(dev->ctrl_dv_timings, dev->query_dv_timings[dev->input]); } else if (vivid_is_sdtv_cap(dev)) { v4l2_ctrl_s_ctrl(dev->ctrl_std_signal_mode, dev->std_signal_mode[dev->input]); v4l2_ctrl_s_ctrl(dev->ctrl_standard, dev->std_signal_mode[dev->input]); } return 0; } int vidioc_enumaudio(struct file *file, void *fh, struct v4l2_audio *vin) { if (vin->index >= ARRAY_SIZE(vivid_audio_inputs)) return -EINVAL; *vin = vivid_audio_inputs[vin->index]; return 0; } int vidioc_g_audio(struct file *file, void *fh, struct v4l2_audio *vin) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_sdtv_cap(dev)) return -EINVAL; *vin = vivid_audio_inputs[dev->tv_audio_input]; return 0; } int vidioc_s_audio(struct file *file, void *fh, const struct v4l2_audio *vin) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_sdtv_cap(dev)) return -EINVAL; if (vin->index >= ARRAY_SIZE(vivid_audio_inputs)) return -EINVAL; dev->tv_audio_input = vin->index; return 0; } int vivid_video_g_frequency(struct file *file, void *fh, struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); if (vf->tuner != 0) return -EINVAL; vf->frequency = dev->tv_freq; return 0; } int vivid_video_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); if (vf->tuner != 0) return -EINVAL; dev->tv_freq = clamp_t(unsigned, vf->frequency, MIN_TV_FREQ, MAX_TV_FREQ); if (vivid_is_tv_cap(dev)) vivid_update_quality(dev); return 0; } int vivid_video_s_tuner(struct file *file, void *fh, const struct v4l2_tuner *vt) { struct vivid_dev *dev = video_drvdata(file); if (vt->index != 0) return -EINVAL; if (vt->audmode > V4L2_TUNER_MODE_LANG1_LANG2) return -EINVAL; dev->tv_audmode = vt->audmode; return 0; } int vivid_video_g_tuner(struct file *file, void *fh, struct v4l2_tuner *vt) { struct vivid_dev *dev = video_drvdata(file); enum tpg_quality qual; if (vt->index != 0) return -EINVAL; vt->capability = V4L2_TUNER_CAP_NORM | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_LANG1 | V4L2_TUNER_CAP_LANG2; vt->audmode = dev->tv_audmode; vt->rangelow = MIN_TV_FREQ; vt->rangehigh = MAX_TV_FREQ; qual = vivid_get_quality(dev, &vt->afc); if (qual == TPG_QUAL_COLOR) vt->signal = 0xffff; else if (qual == TPG_QUAL_GRAY) vt->signal = 0x8000; else vt->signal = 0; if (qual == TPG_QUAL_NOISE) { vt->rxsubchans = 0; } else if (qual == TPG_QUAL_GRAY) { vt->rxsubchans = V4L2_TUNER_SUB_MONO; } else { unsigned int channel_nr = dev->tv_freq / (6 * 16); unsigned int options = (dev->std_cap[dev->input] & V4L2_STD_NTSC_M) ? 4 : 3; switch (channel_nr % options) { case 0: vt->rxsubchans = V4L2_TUNER_SUB_MONO; break; case 1: vt->rxsubchans = V4L2_TUNER_SUB_STEREO; break; case 2: if (dev->std_cap[dev->input] & V4L2_STD_NTSC_M) vt->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_SAP; else vt->rxsubchans = V4L2_TUNER_SUB_LANG1 | V4L2_TUNER_SUB_LANG2; break; case 3: vt->rxsubchans = V4L2_TUNER_SUB_STEREO | V4L2_TUNER_SUB_SAP; break; } } strscpy(vt->name, "TV Tuner", sizeof(vt->name)); return 0; } /* Must remain in sync with the vivid_ctrl_standard_strings array */ const v4l2_std_id vivid_standard[] = { V4L2_STD_NTSC_M, V4L2_STD_NTSC_M_JP, V4L2_STD_NTSC_M_KR, V4L2_STD_NTSC_443, V4L2_STD_PAL_BG | V4L2_STD_PAL_H, V4L2_STD_PAL_I, V4L2_STD_PAL_DK, V4L2_STD_PAL_M, V4L2_STD_PAL_N, V4L2_STD_PAL_Nc, V4L2_STD_PAL_60, V4L2_STD_SECAM_B | V4L2_STD_SECAM_G | V4L2_STD_SECAM_H, V4L2_STD_SECAM_DK, V4L2_STD_SECAM_L, V4L2_STD_SECAM_LC, V4L2_STD_UNKNOWN }; /* Must remain in sync with the vivid_standard array */ const char * const vivid_ctrl_standard_strings[] = { "NTSC-M", "NTSC-M-JP", "NTSC-M-KR", "NTSC-443", "PAL-BGH", "PAL-I", "PAL-DK", "PAL-M", "PAL-N", "PAL-Nc", "PAL-60", "SECAM-BGH", "SECAM-DK", "SECAM-L", "SECAM-Lc", NULL, }; int vidioc_querystd(struct file *file, void *priv, v4l2_std_id *id) { struct vivid_dev *dev = video_drvdata(file); unsigned int last = dev->query_std_last[dev->input]; if (!vivid_is_sdtv_cap(dev)) return -ENODATA; if (dev->std_signal_mode[dev->input] == NO_SIGNAL || dev->std_signal_mode[dev->input] == NO_LOCK) { *id = V4L2_STD_UNKNOWN; return 0; } if (vivid_is_tv_cap(dev) && tpg_g_quality(&dev->tpg) == TPG_QUAL_NOISE) { *id = V4L2_STD_UNKNOWN; } else if (dev->std_signal_mode[dev->input] == CURRENT_STD) { *id = dev->std_cap[dev->input]; } else if (dev->std_signal_mode[dev->input] == SELECTED_STD) { *id = dev->query_std[dev->input]; } else { *id = vivid_standard[last]; dev->query_std_last[dev->input] = (last + 1) % ARRAY_SIZE(vivid_standard); } return 0; } int vivid_vid_cap_s_std(struct file *file, void *priv, v4l2_std_id id) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_sdtv_cap(dev)) return -ENODATA; if (dev->std_cap[dev->input] == id) return 0; if (vb2_is_busy(&dev->vb_vid_cap_q) || vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; dev->std_cap[dev->input] = id; vivid_update_format_cap(dev, false); return 0; } static void find_aspect_ratio(u32 width, u32 height, u32 *num, u32 *denom) { if (!(height % 3) && ((height * 4 / 3) == width)) { *num = 4; *denom = 3; } else if (!(height % 9) && ((height * 16 / 9) == width)) { *num = 16; *denom = 9; } else if (!(height % 10) && ((height * 16 / 10) == width)) { *num = 16; *denom = 10; } else if (!(height % 4) && ((height * 5 / 4) == width)) { *num = 5; *denom = 4; } else if (!(height % 9) && ((height * 15 / 9) == width)) { *num = 15; *denom = 9; } else { /* default to 16:9 */ *num = 16; *denom = 9; } } static bool valid_cvt_gtf_timings(struct v4l2_dv_timings *timings) { struct v4l2_bt_timings *bt = &timings->bt; u32 total_h_pixel; u32 total_v_lines; u32 h_freq; if (!v4l2_valid_dv_timings(timings, &vivid_dv_timings_cap, NULL, NULL)) return false; total_h_pixel = V4L2_DV_BT_FRAME_WIDTH(bt); total_v_lines = V4L2_DV_BT_FRAME_HEIGHT(bt); h_freq = (u32)bt->pixelclock / total_h_pixel; if (bt->standards == 0 || (bt->standards & V4L2_DV_BT_STD_CVT)) { if (v4l2_detect_cvt(total_v_lines, h_freq, bt->vsync, bt->width, bt->polarities, bt->interlaced, timings)) return true; } if (bt->standards == 0 || (bt->standards & V4L2_DV_BT_STD_GTF)) { struct v4l2_fract aspect_ratio; find_aspect_ratio(bt->width, bt->height, &aspect_ratio.numerator, &aspect_ratio.denominator); if (v4l2_detect_gtf(total_v_lines, h_freq, bt->vsync, bt->polarities, bt->interlaced, aspect_ratio, timings)) return true; } return false; } int vivid_vid_cap_s_dv_timings(struct file *file, void *_fh, struct v4l2_dv_timings *timings) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_hdmi_cap(dev)) return -ENODATA; if (!v4l2_find_dv_timings_cap(timings, &vivid_dv_timings_cap, 0, NULL, NULL) && !valid_cvt_gtf_timings(timings)) return -EINVAL; if (v4l2_match_dv_timings(timings, &dev->dv_timings_cap[dev->input], 0, false)) return 0; if (vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; dev->dv_timings_cap[dev->input] = *timings; vivid_update_format_cap(dev, false); return 0; } int vidioc_query_dv_timings(struct file *file, void *_fh, struct v4l2_dv_timings *timings) { struct vivid_dev *dev = video_drvdata(file); unsigned int input = dev->input; unsigned int last = dev->query_dv_timings_last[input]; if (!vivid_is_hdmi_cap(dev)) return -ENODATA; if (dev->dv_timings_signal_mode[input] == NO_SIGNAL || dev->edid_blocks == 0) return -ENOLINK; if (dev->dv_timings_signal_mode[input] == NO_LOCK) return -ENOLCK; if (dev->dv_timings_signal_mode[input] == OUT_OF_RANGE) { timings->bt.pixelclock = vivid_dv_timings_cap.bt.max_pixelclock * 2; return -ERANGE; } if (dev->dv_timings_signal_mode[input] == CURRENT_DV_TIMINGS) { *timings = dev->dv_timings_cap[input]; } else if (dev->dv_timings_signal_mode[input] == SELECTED_DV_TIMINGS) { *timings = v4l2_dv_timings_presets[dev->query_dv_timings[input]]; } else { *timings = v4l2_dv_timings_presets[last]; dev->query_dv_timings_last[input] = (last + 1) % dev->query_dv_timings_size; } return 0; } int vidioc_s_edid(struct file *file, void *_fh, struct v4l2_edid *edid) { struct vivid_dev *dev = video_drvdata(file); u16 phys_addr; u32 display_present = 0; unsigned int i, j; int ret; memset(edid->reserved, 0, sizeof(edid->reserved)); if (edid->pad >= dev->num_inputs) return -EINVAL; if (dev->input_type[edid->pad] != HDMI || edid->start_block) return -EINVAL; if (edid->blocks == 0) { dev->edid_blocks = 0; v4l2_ctrl_s_ctrl(dev->ctrl_tx_edid_present, 0); v4l2_ctrl_s_ctrl(dev->ctrl_tx_hotplug, 0); phys_addr = CEC_PHYS_ADDR_INVALID; goto set_phys_addr; } if (edid->blocks > dev->edid_max_blocks) { edid->blocks = dev->edid_max_blocks; return -E2BIG; } phys_addr = cec_get_edid_phys_addr(edid->edid, edid->blocks * 128, NULL); ret = v4l2_phys_addr_validate(phys_addr, &phys_addr, NULL); if (ret) return ret; if (vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; dev->edid_blocks = edid->blocks; memcpy(dev->edid, edid->edid, edid->blocks * 128); for (i = 0, j = 0; i < dev->num_outputs; i++) if (dev->output_type[i] == HDMI) display_present |= dev->display_present[i] << j++; v4l2_ctrl_s_ctrl(dev->ctrl_tx_edid_present, display_present); v4l2_ctrl_s_ctrl(dev->ctrl_tx_hotplug, display_present); set_phys_addr: /* TODO: a proper hotplug detect cycle should be emulated here */ cec_s_phys_addr(dev->cec_rx_adap, phys_addr, false); for (i = 0; i < MAX_OUTPUTS && dev->cec_tx_adap[i]; i++) cec_s_phys_addr(dev->cec_tx_adap[i], dev->display_present[i] ? v4l2_phys_addr_for_input(phys_addr, i + 1) : CEC_PHYS_ADDR_INVALID, false); return 0; } int vidioc_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_webcam(dev) && !dev->has_scaler_cap) return -EINVAL; if (vivid_get_format(dev, fsize->pixel_format) == NULL) return -EINVAL; if (vivid_is_webcam(dev)) { if (fsize->index >= ARRAY_SIZE(webcam_sizes)) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE; fsize->discrete = webcam_sizes[fsize->index]; return 0; } if (fsize->index) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; fsize->stepwise.min_width = MIN_WIDTH; fsize->stepwise.max_width = MAX_WIDTH * MAX_ZOOM; fsize->stepwise.step_width = 2; fsize->stepwise.min_height = MIN_HEIGHT; fsize->stepwise.max_height = MAX_HEIGHT * MAX_ZOOM; fsize->stepwise.step_height = 2; return 0; } /* timeperframe is arbitrary and continuous */ int vidioc_enum_frameintervals(struct file *file, void *priv, struct v4l2_frmivalenum *fival) { struct vivid_dev *dev = video_drvdata(file); const struct vivid_fmt *fmt; int i; fmt = vivid_get_format(dev, fival->pixel_format); if (!fmt) return -EINVAL; if (!vivid_is_webcam(dev)) { if (fival->index) return -EINVAL; if (fival->width < MIN_WIDTH || fival->width > MAX_WIDTH * MAX_ZOOM) return -EINVAL; if (fival->height < MIN_HEIGHT || fival->height > MAX_HEIGHT * MAX_ZOOM) return -EINVAL; fival->type = V4L2_FRMIVAL_TYPE_DISCRETE; fival->discrete = dev->timeperframe_vid_cap; return 0; } for (i = 0; i < ARRAY_SIZE(webcam_sizes); i++) if (fival->width == webcam_sizes[i].width && fival->height == webcam_sizes[i].height) break; if (i == ARRAY_SIZE(webcam_sizes)) return -EINVAL; if (fival->index >= webcam_ival_count(dev, i)) return -EINVAL; fival->type = V4L2_FRMIVAL_TYPE_DISCRETE; fival->discrete = webcam_intervals[fival->index]; return 0; } int vivid_vid_cap_g_parm(struct file *file, void *priv, struct v4l2_streamparm *parm) { struct vivid_dev *dev = video_drvdata(file); if (parm->type != (dev->multiplanar ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE : V4L2_BUF_TYPE_VIDEO_CAPTURE)) return -EINVAL; parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; parm->parm.capture.timeperframe = dev->timeperframe_vid_cap; parm->parm.capture.readbuffers = 1; return 0; } int vivid_vid_cap_s_parm(struct file *file, void *priv, struct v4l2_streamparm *parm) { struct vivid_dev *dev = video_drvdata(file); unsigned int ival_sz = webcam_ival_count(dev, dev->webcam_size_idx); struct v4l2_fract tpf; unsigned i; if (parm->type != (dev->multiplanar ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE : V4L2_BUF_TYPE_VIDEO_CAPTURE)) return -EINVAL; if (!vivid_is_webcam(dev)) return vivid_vid_cap_g_parm(file, priv, parm); tpf = parm->parm.capture.timeperframe; if (tpf.denominator == 0) tpf = webcam_intervals[ival_sz - 1]; for (i = 0; i < ival_sz; i++) if (V4L2_FRACT_COMPARE(tpf, >=, webcam_intervals[i])) break; if (i == ival_sz) i = ival_sz - 1; dev->webcam_ival_idx = i; tpf = webcam_intervals[dev->webcam_ival_idx]; /* resync the thread's timings */ dev->cap_seq_resync = true; dev->timeperframe_vid_cap = tpf; parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; parm->parm.capture.timeperframe = tpf; parm->parm.capture.readbuffers = 1; return 0; } |
9 8 16 1 2 4 9 10 10 1 6 2 1 1 6 6 2 4 6 1 2 1 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2016 Facebook */ #include <linux/bpf.h> #include <linux/jhash.h> #include <linux/filter.h> #include <linux/kernel.h> #include <linux/stacktrace.h> #include <linux/perf_event.h> #include <linux/btf_ids.h> #include <linux/buildid.h> #include "percpu_freelist.h" #include "mmap_unlock_work.h" #define STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ BPF_F_STACK_BUILD_ID) struct stack_map_bucket { struct pcpu_freelist_node fnode; u32 hash; u32 nr; u64 data[]; }; struct bpf_stack_map { struct bpf_map map; void *elems; struct pcpu_freelist freelist; u32 n_buckets; struct stack_map_bucket *buckets[] __counted_by(n_buckets); }; static inline bool stack_map_use_build_id(struct bpf_map *map) { return (map->map_flags & BPF_F_STACK_BUILD_ID); } static inline int stack_map_data_size(struct bpf_map *map) { return stack_map_use_build_id(map) ? sizeof(struct bpf_stack_build_id) : sizeof(u64); } static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) { u64 elem_size = sizeof(struct stack_map_bucket) + (u64)smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, smap->map.numa_node); if (!smap->elems) return -ENOMEM; err = pcpu_freelist_init(&smap->freelist); if (err) goto free_elems; pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, smap->map.max_entries); return 0; free_elems: bpf_map_area_free(smap->elems); return err; } /* Called from syscall */ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; struct bpf_stack_map *smap; u64 cost, n_buckets; int err; if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || value_size < 8 || value_size % 8) return ERR_PTR(-EINVAL); BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); if (attr->map_flags & BPF_F_STACK_BUILD_ID) { if (value_size % sizeof(struct bpf_stack_build_id) || value_size / sizeof(struct bpf_stack_build_id) > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); } else if (value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); /* hash table size must be power of 2 */ n_buckets = roundup_pow_of_two(attr->max_entries); if (!n_buckets) return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); smap->n_buckets = n_buckets; err = get_callchain_buffers(sysctl_perf_event_max_stack); if (err) goto free_smap; err = prealloc_elems_and_freelist(smap); if (err) goto put_buffers; return &smap->map; put_buffers: put_callchain_buffers(); free_smap: bpf_map_area_free(smap); return ERR_PTR(err); } static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, u64 *ips, u32 trace_nr, bool user) { int i; struct mmap_unlock_irq_work *work = NULL; bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); struct vm_area_struct *vma, *prev_vma = NULL; const char *prev_build_id; /* If the irq_work is in use, fall back to report ips. Same * fallback is used for kernel stack (!user) on a stackmap with * build_id. */ if (!user || !current || !current->mm || irq_work_busy || !mmap_read_trylock(current->mm)) { /* cannot access current->mm, fall back to ips */ for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); } return; } for (i = 0; i < trace_nr; i++) { if (range_in_vma(prev_vma, ips[i], ips[i])) { vma = prev_vma; memcpy(id_offs[i].build_id, prev_build_id, BUILD_ID_SIZE_MAX); goto build_id_valid; } vma = find_vma(current->mm, ips[i]); if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) { /* per entry fall back to ips */ id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); continue; } build_id_valid: id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] - vma->vm_start; id_offs[i].status = BPF_STACK_BUILD_ID_VALID; prev_vma = vma; prev_build_id = id_offs[i].build_id; } bpf_mmap_unlock_mm(work, current->mm); } static struct perf_callchain_entry * get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) { #ifdef CONFIG_STACKTRACE struct perf_callchain_entry *entry; int rctx; entry = get_callchain_entry(&rctx); if (!entry) return NULL; entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, max_depth, 0); /* stack_trace_save_tsk() works on unsigned long array, while * perf_callchain_entry uses u64 array. For 32-bit systems, it is * necessary to fix this mismatch. */ if (__BITS_PER_LONG != 64) { unsigned long *from = (unsigned long *) entry->ip; u64 *to = entry->ip; int i; /* copy data from the end to avoid using extra buffer */ for (i = entry->nr - 1; i >= 0; i--) to[i] = (u64)(from[i]); } put_callchain_entry(rctx); return entry; #else /* CONFIG_STACKTRACE */ return NULL; #endif } static long __bpf_get_stackid(struct bpf_map *map, struct perf_callchain_entry *trace, u64 flags) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *new_bucket, *old_bucket; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len; bool user = flags & BPF_F_USER_STACK; u64 *ips; bool hash_matches; if (trace->nr <= skip) /* skipping more than usable stack trace */ return -EFAULT; trace_nr = trace->nr - skip; trace_len = trace_nr * sizeof(u64); ips = trace->ip + skip; hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); id = hash & (smap->n_buckets - 1); bucket = READ_ONCE(smap->buckets[id]); hash_matches = bucket && bucket->hash == hash; /* fast cmp */ if (hash_matches && flags & BPF_F_FAST_STACK_CMP) return id; if (stack_map_use_build_id(map)) { /* for build_id+offset, pop a bucket before slow cmp */ new_bucket = (struct stack_map_bucket *) pcpu_freelist_pop(&smap->freelist); if (unlikely(!new_bucket)) return -ENOMEM; new_bucket->nr = trace_nr; stack_map_get_build_id_offset( (struct bpf_stack_build_id *)new_bucket->data, ips, trace_nr, user); trace_len = trace_nr * sizeof(struct bpf_stack_build_id); if (hash_matches && bucket->nr == trace_nr && memcmp(bucket->data, new_bucket->data, trace_len) == 0) { pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); return id; } if (bucket && !(flags & BPF_F_REUSE_STACKID)) { pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); return -EEXIST; } } else { if (hash_matches && bucket->nr == trace_nr && memcmp(bucket->data, ips, trace_len) == 0) return id; if (bucket && !(flags & BPF_F_REUSE_STACKID)) return -EEXIST; new_bucket = (struct stack_map_bucket *) pcpu_freelist_pop(&smap->freelist); if (unlikely(!new_bucket)) return -ENOMEM; memcpy(new_bucket->data, ips, trace_len); } new_bucket->hash = hash; new_bucket->nr = trace_nr; old_bucket = xchg(&smap->buckets[id], new_bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return id; } BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, u64, flags) { u32 max_depth = map->value_size / stack_map_data_size(map); u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; max_depth += skip; if (max_depth > sysctl_perf_event_max_stack) max_depth = sysctl_perf_event_max_stack; trace = get_perf_callchain(regs, 0, kernel, user, max_depth, false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ return -EFAULT; return __bpf_get_stackid(map, trace, flags); } const struct bpf_func_proto bpf_get_stackid_proto = { .func = bpf_get_stackid, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; static __u64 count_kernel_ip(struct perf_callchain_entry *trace) { __u64 nr_kernel = 0; while (nr_kernel < trace->nr) { if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) break; nr_kernel++; } return nr_kernel; } BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, struct bpf_map *, map, u64, flags) { struct perf_event *event = ctx->event; struct perf_callchain_entry *trace; bool kernel, user; __u64 nr_kernel; int ret; /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) return bpf_get_stackid((unsigned long)(ctx->regs), (unsigned long) map, flags, 0, 0); if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; user = flags & BPF_F_USER_STACK; kernel = !user; trace = ctx->data->callchain; if (unlikely(!trace)) return -EFAULT; nr_kernel = count_kernel_ip(trace); if (kernel) { __u64 nr = trace->nr; trace->nr = nr_kernel; ret = __bpf_get_stackid(map, trace, flags); /* restore nr */ trace->nr = nr; } else { /* user */ u64 skip = flags & BPF_F_SKIP_FIELD_MASK; skip += nr_kernel; if (skip > BPF_F_SKIP_FIELD_MASK) return -EFAULT; flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; ret = __bpf_get_stackid(map, trace, flags); } return ret; } const struct bpf_func_proto bpf_get_stackid_proto_pe = { .func = bpf_get_stackid_pe, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, struct perf_callchain_entry *trace_in, void *buf, u32 size, u64 flags) { u32 trace_nr, copy_len, elem_size, num_elem, max_depth; bool user_build_id = flags & BPF_F_USER_BUILD_ID; bool crosstask = task && task != current; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; int err = -EINVAL; u64 *ips; if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID))) goto clear; if (kernel && user_build_id) goto clear; elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) : sizeof(u64); if (unlikely(size % elem_size)) goto clear; /* cannot get valid user stack for task without user_mode regs */ if (task && user && !user_mode(regs)) goto err_fault; /* get_perf_callchain does not support crosstask user stack walking * but returns an empty stack instead of NULL. */ if (crosstask && user) { err = -EOPNOTSUPP; goto clear; } num_elem = size / elem_size; max_depth = num_elem + skip; if (sysctl_perf_event_max_stack < max_depth) max_depth = sysctl_perf_event_max_stack; if (trace_in) trace = trace_in; else if (kernel && task) trace = get_callchain_entry_for_task(task, max_depth); else trace = get_perf_callchain(regs, 0, kernel, user, max_depth, crosstask, false); if (unlikely(!trace)) goto err_fault; if (trace->nr < skip) goto err_fault; trace_nr = trace->nr - skip; trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; copy_len = trace_nr * elem_size; ips = trace->ip + skip; if (user && user_build_id) stack_map_get_build_id_offset(buf, ips, trace_nr, user); else memcpy(buf, ips, copy_len); if (size > copy_len) memset(buf + copy_len, 0, size - copy_len); return copy_len; err_fault: err = -EFAULT; clear: memset(buf, 0, size); return err; } BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, u64, flags) { return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); } const struct bpf_func_proto bpf_get_stack_proto = { .func = bpf_get_stack, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, u32, size, u64, flags) { struct pt_regs *regs; long res = -EINVAL; if (!try_get_task_stack(task)) return -EFAULT; regs = task_pt_regs(task); if (regs) res = __bpf_get_stack(regs, task, NULL, buf, size, flags); put_task_stack(task); return res; } const struct bpf_func_proto bpf_get_task_stack_proto = { .func = bpf_get_task_stack, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, void *, buf, u32, size, u64, flags) { struct pt_regs *regs = (struct pt_regs *)(ctx->regs); struct perf_event *event = ctx->event; struct perf_callchain_entry *trace; bool kernel, user; int err = -EINVAL; __u64 nr_kernel; if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | BPF_F_USER_BUILD_ID))) goto clear; user = flags & BPF_F_USER_STACK; kernel = !user; err = -EFAULT; trace = ctx->data->callchain; if (unlikely(!trace)) goto clear; nr_kernel = count_kernel_ip(trace); if (kernel) { __u64 nr = trace->nr; trace->nr = nr_kernel; err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); /* restore nr */ trace->nr = nr; } else { /* user */ u64 skip = flags & BPF_F_SKIP_FIELD_MASK; skip += nr_kernel; if (skip > BPF_F_SKIP_FIELD_MASK) goto clear; flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); } return err; clear: memset(buf, 0, size); return err; } const struct bpf_func_proto bpf_get_stack_proto_pe = { .func = bpf_get_stack_pe, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_UNINIT_MEM, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; /* Called from eBPF program */ static void *stack_map_lookup_elem(struct bpf_map *map, void *key) { return ERR_PTR(-EOPNOTSUPP); } /* Called from syscall */ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *old_bucket; u32 id = *(u32 *)key, trace_len; if (unlikely(id >= smap->n_buckets)) return -ENOENT; bucket = xchg(&smap->buckets[id], NULL); if (!bucket) return -ENOENT; trace_len = bucket->nr * stack_map_data_size(map); memcpy(value, bucket->data, trace_len); memset(value + trace_len, 0, map->value_size - trace_len); old_bucket = xchg(&smap->buckets[id], bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; } static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); u32 id; WARN_ON_ONCE(!rcu_read_lock_held()); if (!key) { id = 0; } else { id = *(u32 *)key; if (id >= smap->n_buckets || !smap->buckets[id]) id = 0; else id++; } while (id < smap->n_buckets && !smap->buckets[id]) id++; if (id >= smap->n_buckets) return -ENOENT; *(u32 *)next_key = id; return 0; } static long stack_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ static long stack_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *old_bucket; u32 id = *(u32 *)key; if (unlikely(id >= smap->n_buckets)) return -E2BIG; old_bucket = xchg(&smap->buckets[id], NULL); if (old_bucket) { pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; } else { return -ENOENT; } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void stack_map_free(struct bpf_map *map) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); bpf_map_area_free(smap->elems); pcpu_freelist_destroy(&smap->freelist); bpf_map_area_free(smap); put_callchain_buffers(); } static u64 stack_map_mem_usage(const struct bpf_map *map) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); u64 value_size = map->value_size; u64 n_buckets = smap->n_buckets; u64 enties = map->max_entries; u64 usage = sizeof(*smap); usage += n_buckets * sizeof(struct stack_map_bucket *); usage += enties * (sizeof(struct stack_map_bucket) + value_size); return usage; } BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) const struct bpf_map_ops stack_trace_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc = stack_map_alloc, .map_free = stack_map_free, .map_get_next_key = stack_map_get_next_key, .map_lookup_elem = stack_map_lookup_elem, .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, .map_mem_usage = stack_map_mem_usage, .map_btf_id = &stack_trace_map_btf_ids[0], }; |
1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 | // SPDX-License-Identifier: GPL-2.0 /* * Simple file system for zoned block devices exposing zones as files. * * Copyright (C) 2019 Western Digital Corporation or its affiliates. */ #include <linux/module.h> #include <linux/pagemap.h> #include <linux/magic.h> #include <linux/iomap.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/statfs.h> #include <linux/writeback.h> #include <linux/quotaops.h> #include <linux/seq_file.h> #include <linux/parser.h> #include <linux/uio.h> #include <linux/mman.h> #include <linux/sched/mm.h> #include <linux/crc32.h> #include <linux/task_io_accounting_ops.h> #include "zonefs.h" #define CREATE_TRACE_POINTS #include "trace.h" /* * Get the name of a zone group directory. */ static const char *zonefs_zgroup_name(enum zonefs_ztype ztype) { switch (ztype) { case ZONEFS_ZTYPE_CNV: return "cnv"; case ZONEFS_ZTYPE_SEQ: return "seq"; default: WARN_ON_ONCE(1); return "???"; } } /* * Manage the active zone count. */ static void zonefs_account_active(struct super_block *sb, struct zonefs_zone *z) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); if (zonefs_zone_is_cnv(z)) return; /* * For zones that transitioned to the offline or readonly condition, * we only need to clear the active state. */ if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) goto out; /* * If the zone is active, that is, if it is explicitly open or * partially written, check if it was already accounted as active. */ if ((z->z_flags & ZONEFS_ZONE_OPEN) || (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) { if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) { z->z_flags |= ZONEFS_ZONE_ACTIVE; atomic_inc(&sbi->s_active_seq_files); } return; } out: /* The zone is not active. If it was, update the active count */ if (z->z_flags & ZONEFS_ZONE_ACTIVE) { z->z_flags &= ~ZONEFS_ZONE_ACTIVE; atomic_dec(&sbi->s_active_seq_files); } } /* * Manage the active zone count. Called with zi->i_truncate_mutex held. */ void zonefs_inode_account_active(struct inode *inode) { lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode)); } /* * Execute a zone management operation. */ static int zonefs_zone_mgmt(struct super_block *sb, struct zonefs_zone *z, enum req_op op) { int ret; /* * With ZNS drives, closing an explicitly open zone that has not been * written will change the zone state to "closed", that is, the zone * will remain active. Since this can then cause failure of explicit * open operation on other zones if the drive active zone resources * are exceeded, make sure that the zone does not remain active by * resetting it. */ if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset) op = REQ_OP_ZONE_RESET; trace_zonefs_zone_mgmt(sb, z, op); ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, z->z_size >> SECTOR_SHIFT, GFP_NOFS); if (ret) { zonefs_err(sb, "Zone management operation %s at %llu failed %d\n", blk_op_str(op), z->z_sector, ret); return ret; } return 0; } int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op) { lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op); } void zonefs_i_size_write(struct inode *inode, loff_t isize) { struct zonefs_zone *z = zonefs_inode_zone(inode); i_size_write(inode, isize); /* * A full zone is no longer open/active and does not need * explicit closing. */ if (isize >= z->z_capacity) { struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); if (z->z_flags & ZONEFS_ZONE_ACTIVE) atomic_dec(&sbi->s_active_seq_files); z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); } } void zonefs_update_stats(struct inode *inode, loff_t new_isize) { struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); loff_t old_isize = i_size_read(inode); loff_t nr_blocks; if (new_isize == old_isize) return; spin_lock(&sbi->s_lock); /* * This may be called for an update after an IO error. * So beware of the values seen. */ if (new_isize < old_isize) { nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; if (sbi->s_used_blocks > nr_blocks) sbi->s_used_blocks -= nr_blocks; else sbi->s_used_blocks = 0; } else { sbi->s_used_blocks += (new_isize - old_isize) >> sb->s_blocksize_bits; if (sbi->s_used_blocks > sbi->s_blocks) sbi->s_used_blocks = sbi->s_blocks; } spin_unlock(&sbi->s_lock); } /* * Check a zone condition. Return the amount of written (and still readable) * data in the zone. */ static loff_t zonefs_check_zone_condition(struct super_block *sb, struct zonefs_zone *z, struct blk_zone *zone) { switch (zone->cond) { case BLK_ZONE_COND_OFFLINE: zonefs_warn(sb, "Zone %llu: offline zone\n", z->z_sector); z->z_flags |= ZONEFS_ZONE_OFFLINE; return 0; case BLK_ZONE_COND_READONLY: /* * The write pointer of read-only zones is invalid, so we cannot * determine the zone wpoffset (inode size). We thus keep the * zone wpoffset as is, which leads to an empty file * (wpoffset == 0) on mount. For a runtime error, this keeps * the inode size as it was when last updated so that the user * can recover data. */ zonefs_warn(sb, "Zone %llu: read-only zone\n", z->z_sector); z->z_flags |= ZONEFS_ZONE_READONLY; if (zonefs_zone_is_cnv(z)) return z->z_capacity; return z->z_wpoffset; case BLK_ZONE_COND_FULL: /* The write pointer of full zones is invalid. */ return z->z_capacity; default: if (zonefs_zone_is_cnv(z)) return z->z_capacity; return (zone->wp - zone->start) << SECTOR_SHIFT; } } /* * Check a zone condition and adjust its inode access permissions for * offline and readonly zones. */ static void zonefs_inode_update_mode(struct inode *inode) { struct zonefs_zone *z = zonefs_inode_zone(inode); if (z->z_flags & ZONEFS_ZONE_OFFLINE) { /* Offline zones cannot be read nor written */ inode->i_flags |= S_IMMUTABLE; inode->i_mode &= ~0777; } else if (z->z_flags & ZONEFS_ZONE_READONLY) { /* Readonly zones cannot be written */ inode->i_flags |= S_IMMUTABLE; if (z->z_flags & ZONEFS_ZONE_INIT_MODE) inode->i_mode &= ~0777; else inode->i_mode &= ~0222; } z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; z->z_mode = inode->i_mode; } static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_zone *z = data; *z = *zone; return 0; } static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); loff_t isize, data_size; /* * Check the zone condition: if the zone is not "bad" (offline or * read-only), read errors are simply signaled to the IO issuer as long * as there is no inconsistency between the inode size and the amount of * data writen in the zone (data_size). */ data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && !write && isize == data_size) return; /* * At this point, we detected either a bad zone or an inconsistency * between the inode size and the amount of data written in the zone. * For the latter case, the cause may be a write IO error or an external * action on the device. Two error patterns exist: * 1) The inode size is lower than the amount of data in the zone: * a write operation partially failed and data was writen at the end * of the file. This can happen in the case of a large direct IO * needing several BIOs and/or write requests to be processed. * 2) The inode size is larger than the amount of data in the zone: * this can happen with a deferred write error with the use of the * device side write cache after getting successful write IO * completions. Other possibilities are (a) an external corruption, * e.g. an application reset the zone directly, or (b) the device * has a serious problem (e.g. firmware bug). * * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); /* * First handle bad zones signaled by hardware. The mount options * errors=zone-ro and errors=zone-offline result in changing the * zone condition to read-only and offline respectively, as if the * condition was signaled by the hardware. */ if ((z->z_flags & ZONEFS_ZONE_OFFLINE) || (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) { zonefs_warn(sb, "inode %lu: read/write access disabled\n", inode->i_ino); if (!(z->z_flags & ZONEFS_ZONE_OFFLINE)) z->z_flags |= ZONEFS_ZONE_OFFLINE; zonefs_inode_update_mode(inode); data_size = 0; } else if ((z->z_flags & ZONEFS_ZONE_READONLY) || (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) { zonefs_warn(sb, "inode %lu: write access disabled\n", inode->i_ino); if (!(z->z_flags & ZONEFS_ZONE_READONLY)) z->z_flags |= ZONEFS_ZONE_READONLY; zonefs_inode_update_mode(inode); data_size = isize; } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && data_size > isize) { /* Do not expose garbage data */ data_size = isize; } /* * If the filesystem is mounted with the explicit-open mount option, we * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to * the read-only or offline condition, to avoid attempting an explicit * close of the zone when the inode file is closed. */ if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) && (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) z->z_flags &= ~ZONEFS_ZONE_OPEN; /* * If error=remount-ro was specified, any error result in remounting * the volume as read-only. */ if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { zonefs_warn(sb, "remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; } /* * Update block usage stats and the inode size to prevent access to * invalid data. */ zonefs_update_stats(inode, data_size); zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); } /* * When an file IO error occurs, check the file zone to see if there is a change * in the zone condition (e.g. offline or read-only). For a failed write to a * sequential zone, the zone write pointer position must also be checked to * eventually correct the file size and zonefs inode write pointer offset * (which can be out of sync with the drive due to partial write failures). */ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; unsigned int noio_flag; struct blk_zone zone; int ret; /* * Conventional zone have no write pointer and cannot become read-only * or offline. So simply fake a report for a single or aggregated zone * and let zonefs_handle_io_error() correct the zone inode information * according to the mount options. */ if (!zonefs_zone_is_seq(z)) { zone.start = z->z_sector; zone.len = z->z_size >> SECTOR_SHIFT; zone.wp = zone.start + zone.len; zone.type = BLK_ZONE_TYPE_CONVENTIONAL; zone.cond = BLK_ZONE_COND_NOT_WP; zone.capacity = zone.len; goto handle_io_error; } /* * Memory allocations in blkdev_report_zones() can trigger a memory * reclaim which may in turn cause a recursion into zonefs as well as * struct request allocations for the same device. The former case may * end up in a deadlock on the inode truncate mutex, while the latter * may prevent IO forward progress. Executing the report zones under * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, zonefs_io_error_cb, &zone); memalloc_noio_restore(noio_flag); if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); zonefs_warn(sb, "remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; return; } handle_io_error: zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; static struct inode *zonefs_alloc_inode(struct super_block *sb) { struct zonefs_inode_info *zi; zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL); if (!zi) return NULL; inode_init_once(&zi->i_vnode); mutex_init(&zi->i_truncate_mutex); zi->i_wr_refcnt = 0; return &zi->i_vnode; } static void zonefs_free_inode(struct inode *inode) { kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); } /* * File system stat. */ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype t; buf->f_type = ZONEFS_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_namelen = ZONEFS_NAME_MAX; spin_lock(&sbi->s_lock); buf->f_blocks = sbi->s_blocks; if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) buf->f_bfree = 0; else buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; buf->f_bavail = buf->f_bfree; for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { if (sbi->s_zgroup[t].g_nr_zones) buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1; } buf->f_ffree = 0; spin_unlock(&sbi->s_lock); buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b); return 0; } enum { Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair, Opt_explicit_open, Opt_err, }; static const match_table_t tokens = { { Opt_errors_ro, "errors=remount-ro"}, { Opt_errors_zro, "errors=zone-ro"}, { Opt_errors_zol, "errors=zone-offline"}, { Opt_errors_repair, "errors=repair"}, { Opt_explicit_open, "explicit-open" }, { Opt_err, NULL} }; static int zonefs_parse_options(struct super_block *sb, char *options) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); substring_t args[MAX_OPT_ARGS]; char *p; if (!options) return 0; while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_errors_ro: sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO; break; case Opt_errors_zro: sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO; break; case Opt_errors_zol: sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL; break; case Opt_errors_repair: sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR; break; case Opt_explicit_open: sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN; break; default: return -EINVAL; } } return 0; } static int zonefs_show_options(struct seq_file *seq, struct dentry *root) { struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) seq_puts(seq, ",errors=remount-ro"); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) seq_puts(seq, ",errors=zone-ro"); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) seq_puts(seq, ",errors=zone-offline"); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) seq_puts(seq, ",errors=repair"); return 0; } static int zonefs_remount(struct super_block *sb, int *flags, char *data) { sync_filesystem(sb); return zonefs_parse_options(sb, data); } static int zonefs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int ret; if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (ret) return ret; /* * Since files and directories cannot be created nor deleted, do not * allow setting any write attributes on the sub-directories grouping * files by zone type. */ if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && (iattr->ia_mode & 0222)) return -EPERM; if (((iattr->ia_valid & ATTR_UID) && !uid_eq(iattr->ia_uid, inode->i_uid)) || ((iattr->ia_valid & ATTR_GID) && !gid_eq(iattr->ia_gid, inode->i_gid))) { ret = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (ret) return ret; } if (iattr->ia_valid & ATTR_SIZE) { ret = zonefs_file_truncate(inode, iattr->ia_size); if (ret) return ret; } setattr_copy(&nop_mnt_idmap, inode, iattr); if (S_ISREG(inode->i_mode)) { struct zonefs_zone *z = zonefs_inode_zone(inode); z->z_mode = inode->i_mode; z->z_uid = inode->i_uid; z->z_gid = inode->i_gid; } return 0; } static const struct inode_operations zonefs_file_inode_operations = { .setattr = zonefs_inode_setattr, }; static long zonefs_fname_to_fno(const struct qstr *fname) { const char *name = fname->name; unsigned int len = fname->len; long fno = 0, shift = 1; const char *rname; char c = *name; unsigned int i; /* * File names are always a base-10 number string without any * leading 0s. */ if (!isdigit(c)) return -ENOENT; if (len > 1 && c == '0') return -ENOENT; if (len == 1) return c - '0'; for (i = 0, rname = name + len - 1; i < len; i++, rname--) { c = *rname; if (!isdigit(c)) return -ENOENT; fno += (c - '0') * shift; shift *= 10; } return fno; } static struct inode *zonefs_get_file_inode(struct inode *dir, struct dentry *dentry) { struct zonefs_zone_group *zgroup = dir->i_private; struct super_block *sb = dir->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_zone *z; struct inode *inode; ino_t ino; long fno; /* Get the file number from the file name */ fno = zonefs_fname_to_fno(&dentry->d_name); if (fno < 0) return ERR_PTR(fno); if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones) return ERR_PTR(-ENOENT); z = &zgroup->g_zones[fno]; ino = z->z_sector >> sbi->s_zone_sectors_shift; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { WARN_ON_ONCE(inode->i_private != z); return inode; } inode->i_ino = ino; inode->i_mode = z->z_mode; inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(dir)))); inode->i_uid = z->z_uid; inode->i_gid = z->z_gid; inode->i_size = z->z_wpoffset; inode->i_blocks = z->z_capacity >> SECTOR_SHIFT; inode->i_private = z; inode->i_op = &zonefs_file_inode_operations; inode->i_fop = &zonefs_file_operations; inode->i_mapping->a_ops = &zonefs_file_aops; /* Update the inode access rights depending on the zone condition */ zonefs_inode_update_mode(inode); unlock_new_inode(inode); return inode; } static struct inode *zonefs_get_zgroup_inode(struct super_block *sb, enum zonefs_ztype ztype) { struct inode *root = d_inode(sb->s_root); struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct inode *inode; ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; inode->i_ino = ino; inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555); inode->i_size = sbi->s_zgroup[ztype].g_nr_zones; inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(root)))); inode->i_private = &sbi->s_zgroup[ztype]; set_nlink(inode, 2); inode->i_op = &zonefs_dir_inode_operations; inode->i_fop = &zonefs_dir_operations; unlock_new_inode(inode); return inode; } static struct inode *zonefs_get_dir_inode(struct inode *dir, struct dentry *dentry) { struct super_block *sb = dir->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); const char *name = dentry->d_name.name; enum zonefs_ztype ztype; /* * We only need to check for the "seq" directory and * the "cnv" directory if we have conventional zones. */ if (dentry->d_name.len != 3) return ERR_PTR(-ENOENT); for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (sbi->s_zgroup[ztype].g_nr_zones && memcmp(name, zonefs_zgroup_name(ztype), 3) == 0) break; } if (ztype == ZONEFS_ZTYPE_MAX) return ERR_PTR(-ENOENT); return zonefs_get_zgroup_inode(sb, ztype); } static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; if (dentry->d_name.len > ZONEFS_NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (dir == d_inode(dir->i_sb->s_root)) inode = zonefs_get_dir_inode(dir, dentry); else inode = zonefs_get_file_inode(dir, dentry); return d_splice_alias(inode, dentry); } static int zonefs_readdir_root(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV; ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1; if (ctx->pos >= inode->i_size) return 0; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) { if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ztype = ZONEFS_ZTYPE_SEQ; if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, base_ino + ztype, DT_DIR)) return 0; ctx->pos++; } if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) { ztype = ZONEFS_ZTYPE_SEQ; if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, base_ino + ztype, DT_DIR)) return 0; ctx->pos++; } return 0; } static int zonefs_readdir_zgroup(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct zonefs_zone_group *zgroup = inode->i_private; struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_zone *z; int fname_len; char *fname; ino_t ino; int f; /* * The size of zone group directories is equal to the number * of zone files in the group and does note include the "." and * ".." entries. Hence the "+ 2" here. */ if (ctx->pos >= inode->i_size + 2) return 0; if (!dir_emit_dots(file, ctx)) return 0; fname = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); if (!fname) return -ENOMEM; for (f = ctx->pos - 2; f < zgroup->g_nr_zones; f++) { z = &zgroup->g_zones[f]; ino = z->z_sector >> sbi->s_zone_sectors_shift; fname_len = snprintf(fname, ZONEFS_NAME_MAX - 1, "%u", f); if (!dir_emit(ctx, fname, fname_len, ino, DT_REG)) break; ctx->pos++; } kfree(fname); return 0; } static int zonefs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); if (inode == d_inode(inode->i_sb->s_root)) return zonefs_readdir_root(file, ctx); return zonefs_readdir_zgroup(file, ctx); } const struct inode_operations zonefs_dir_inode_operations = { .lookup = zonefs_lookup, .setattr = zonefs_inode_setattr, }; const struct file_operations zonefs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = zonefs_readdir, }; struct zonefs_zone_data { struct super_block *sb; unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; sector_t cnv_zone_start; struct blk_zone *zones; }; static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct zonefs_zone_data *zd = data; struct super_block *sb = zd->sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); /* * We do not care about the first zone: it contains the super block * and not exposed as a file. */ if (!idx) return 0; /* * Count the number of zones that will be exposed as files. * For sequential zones, we always have as many files as zones. * FOr conventional zones, the number of files depends on if we have * conventional zones aggregation enabled. */ switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: if (sbi->s_features & ZONEFS_F_AGGRCNV) { /* One file per set of contiguous conventional zones */ if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) || zone->start != zd->cnv_zone_start) sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; zd->cnv_zone_start = zone->start + zone->len; } else { /* One file per zone */ sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; } break; case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++; break; default: zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", zone->type); return -EIO; } memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); return 0; } static int zonefs_get_zone_info(struct zonefs_zone_data *zd) { struct block_device *bdev = zd->sb->s_bdev; int ret; zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone), GFP_KERNEL); if (!zd->zones) return -ENOMEM; /* Get zones information from the device */ ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, zonefs_get_zone_info_cb, zd); if (ret < 0) { zonefs_err(zd->sb, "Zone report failed %d\n", ret); return ret; } if (ret != bdev_nr_zones(bdev)) { zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", ret, bdev_nr_zones(bdev)); return -EIO; } return 0; } static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd) { kvfree(zd->zones); } /* * Create a zone group and populate it with zone files. */ static int zonefs_init_zgroup(struct super_block *sb, struct zonefs_zone_data *zd, enum zonefs_ztype ztype) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; struct blk_zone *zone, *next, *end; struct zonefs_zone *z; unsigned int n = 0; int ret; /* Allocate the zone group. If it is empty, we have nothing to do. */ if (!zgroup->g_nr_zones) return 0; zgroup->g_zones = kvcalloc(zgroup->g_nr_zones, sizeof(struct zonefs_zone), GFP_KERNEL); if (!zgroup->g_zones) return -ENOMEM; /* * Initialize the zone groups using the device zone information. * We always skip the first zone as it contains the super block * and is not use to back a file. */ end = zd->zones + bdev_nr_zones(sb->s_bdev); for (zone = &zd->zones[1]; zone < end; zone = next) { next = zone + 1; if (zonefs_zone_type(zone) != ztype) continue; if (WARN_ON_ONCE(n >= zgroup->g_nr_zones)) return -EINVAL; /* * For conventional zones, contiguous zones can be aggregated * together to form larger files. Note that this overwrites the * length of the first zone of the set of contiguous zones * aggregated together. If one offline or read-only zone is * found, assume that all zones aggregated have the same * condition. */ if (ztype == ZONEFS_ZTYPE_CNV && (sbi->s_features & ZONEFS_F_AGGRCNV)) { for (; next < end; next++) { if (zonefs_zone_type(next) != ztype) break; zone->len += next->len; zone->capacity += next->capacity; if (next->cond == BLK_ZONE_COND_READONLY && zone->cond != BLK_ZONE_COND_OFFLINE) zone->cond = BLK_ZONE_COND_READONLY; else if (next->cond == BLK_ZONE_COND_OFFLINE) zone->cond = BLK_ZONE_COND_OFFLINE; } } z = &zgroup->g_zones[n]; if (ztype == ZONEFS_ZTYPE_CNV) z->z_flags |= ZONEFS_ZONE_CNV; z->z_sector = zone->start; z->z_size = zone->len << SECTOR_SHIFT; if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && !(sbi->s_features & ZONEFS_F_AGGRCNV)) { zonefs_err(sb, "Invalid zone size %llu (device zone sectors %llu)\n", z->z_size, bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); return -EINVAL; } z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE, zone->capacity << SECTOR_SHIFT); z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone); z->z_mode = S_IFREG | sbi->s_perm; z->z_uid = sbi->s_uid; z->z_gid = sbi->s_gid; /* * Let zonefs_inode_update_mode() know that we will need * special initialization of the inode mode the first time * it is accessed. */ z->z_flags |= ZONEFS_ZONE_INIT_MODE; sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes); sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits; sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits; /* * For sequential zones, make sure that any open zone is closed * first to ensure that the initial number of open zones is 0, * in sync with the open zone accounting done when the mount * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used. */ if (ztype == ZONEFS_ZTYPE_SEQ && (zone->cond == BLK_ZONE_COND_IMP_OPEN || zone->cond == BLK_ZONE_COND_EXP_OPEN)) { ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE); if (ret) return ret; } zonefs_account_active(sb, z); n++; } if (WARN_ON_ONCE(n != zgroup->g_nr_zones)) return -EINVAL; zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", zonefs_zgroup_name(ztype), zgroup->g_nr_zones, zgroup->g_nr_zones > 1 ? "s" : ""); return 0; } static void zonefs_free_zgroups(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype ztype; if (!sbi) return; for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { kvfree(sbi->s_zgroup[ztype].g_zones); sbi->s_zgroup[ztype].g_zones = NULL; } } /* * Create a zone group and populate it with zone files. */ static int zonefs_init_zgroups(struct super_block *sb) { struct zonefs_zone_data zd; enum zonefs_ztype ztype; int ret; /* First get the device zone information */ memset(&zd, 0, sizeof(struct zonefs_zone_data)); zd.sb = sb; ret = zonefs_get_zone_info(&zd); if (ret) goto cleanup; /* Allocate and initialize the zone groups */ for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { ret = zonefs_init_zgroup(sb, &zd, ztype); if (ret) { zonefs_info(sb, "Zone group \"%s\" initialization failed\n", zonefs_zgroup_name(ztype)); break; } } cleanup: zonefs_free_zone_info(&zd); if (ret) zonefs_free_zgroups(sb); return ret; } /* * Read super block information from the device. */ static int zonefs_read_super(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_super *super; u32 crc, stored_crc; struct page *page; struct bio_vec bio_vec; struct bio bio; int ret; page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); bio.bi_iter.bi_sector = 0; __bio_add_page(&bio, page, PAGE_SIZE, 0); ret = submit_bio_wait(&bio); if (ret) goto free_page; super = page_address(page); ret = -EINVAL; if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) goto free_page; stored_crc = le32_to_cpu(super->s_crc); super->s_crc = 0; crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); if (crc != stored_crc) { zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", crc, stored_crc); goto free_page; } sbi->s_features = le64_to_cpu(super->s_features); if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { zonefs_err(sb, "Unknown features set 0x%llx\n", sbi->s_features); goto free_page; } if (sbi->s_features & ZONEFS_F_UID) { sbi->s_uid = make_kuid(current_user_ns(), le32_to_cpu(super->s_uid)); if (!uid_valid(sbi->s_uid)) { zonefs_err(sb, "Invalid UID feature\n"); goto free_page; } } if (sbi->s_features & ZONEFS_F_GID) { sbi->s_gid = make_kgid(current_user_ns(), le32_to_cpu(super->s_gid)); if (!gid_valid(sbi->s_gid)) { zonefs_err(sb, "Invalid GID feature\n"); goto free_page; } } if (sbi->s_features & ZONEFS_F_PERM) sbi->s_perm = le32_to_cpu(super->s_perm); if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { zonefs_err(sb, "Reserved area is being used\n"); goto free_page; } import_uuid(&sbi->s_uuid, super->s_uuid); ret = 0; free_page: __free_page(page); return ret; } static const struct super_operations zonefs_sops = { .alloc_inode = zonefs_alloc_inode, .free_inode = zonefs_free_inode, .statfs = zonefs_statfs, .remount_fs = zonefs_remount, .show_options = zonefs_show_options, }; static int zonefs_get_zgroup_inodes(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct inode *dir_inode; enum zonefs_ztype ztype; for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (!sbi->s_zgroup[ztype].g_nr_zones) continue; dir_inode = zonefs_get_zgroup_inode(sb, ztype); if (IS_ERR(dir_inode)) return PTR_ERR(dir_inode); sbi->s_zgroup[ztype].g_inode = dir_inode; } return 0; } static void zonefs_release_zgroup_inodes(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype ztype; if (!sbi) return; for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (sbi->s_zgroup[ztype].g_inode) { iput(sbi->s_zgroup[ztype].g_inode); sbi->s_zgroup[ztype].g_inode = NULL; } } } /* * Check that the device is zoned. If it is, get the list of zones and create * sub-directories and files according to the device zone configuration and * format options. */ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) { struct zonefs_sb_info *sbi; struct inode *inode; enum zonefs_ztype ztype; int ret; if (!bdev_is_zoned(sb->s_bdev)) { zonefs_err(sb, "Not a zoned block device\n"); return -EINVAL; } /* * Initialize super block information: the maximum file size is updated * when the zone files are created so that the format option * ZONEFS_F_AGGRCNV which increases the maximum file size of a file * beyond the zone size is taken into account. */ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; spin_lock_init(&sbi->s_lock); sb->s_fs_info = sbi; sb->s_magic = ZONEFS_MAGIC; sb->s_maxbytes = 0; sb->s_op = &zonefs_sops; sb->s_time_gran = 1; /* * The block size is set to the device zone write granularity to ensure * that write operations are always aligned according to the device * interface constraints. */ sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev)); sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); sbi->s_uid = GLOBAL_ROOT_UID; sbi->s_gid = GLOBAL_ROOT_GID; sbi->s_perm = 0640; sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; atomic_set(&sbi->s_wro_seq_files, 0); sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev); atomic_set(&sbi->s_active_seq_files, 0); sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev); ret = zonefs_read_super(sb); if (ret) return ret; ret = zonefs_parse_options(sb, data); if (ret) return ret; zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); if (!sbi->s_max_wro_seq_files && !sbi->s_max_active_seq_files && sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { zonefs_info(sb, "No open and active zone limits. Ignoring explicit_open mount option\n"); sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; } /* Initialize the zone groups */ ret = zonefs_init_zgroups(sb); if (ret) goto cleanup; /* Create the root directory inode */ ret = -ENOMEM; inode = new_inode(sb); if (!inode) goto cleanup; inode->i_ino = bdev_nr_zones(sb->s_bdev); inode->i_mode = S_IFDIR | 0555; simple_inode_init_ts(inode); inode->i_op = &zonefs_dir_inode_operations; inode->i_fop = &zonefs_dir_operations; inode->i_size = 2; set_nlink(inode, 2); for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (sbi->s_zgroup[ztype].g_nr_zones) { inc_nlink(inode); inode->i_size++; } } sb->s_root = d_make_root(inode); if (!sb->s_root) goto cleanup; /* * Take a reference on the zone groups directory inodes * to keep them in the inode cache. */ ret = zonefs_get_zgroup_inodes(sb); if (ret) goto cleanup; ret = zonefs_sysfs_register(sb); if (ret) goto cleanup; return 0; cleanup: zonefs_release_zgroup_inodes(sb); zonefs_free_zgroups(sb); return ret; } static struct dentry *zonefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super); } static void zonefs_kill_super(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); /* Release the reference on the zone group directory inodes */ zonefs_release_zgroup_inodes(sb); kill_block_super(sb); zonefs_sysfs_unregister(sb); zonefs_free_zgroups(sb); kfree(sbi); } /* * File system definition and registration. */ static struct file_system_type zonefs_type = { .owner = THIS_MODULE, .name = "zonefs", .mount = zonefs_mount, .kill_sb = zonefs_kill_super, .fs_flags = FS_REQUIRES_DEV, }; static int __init zonefs_init_inodecache(void) { zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", sizeof(struct zonefs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), NULL); if (zonefs_inode_cachep == NULL) return -ENOMEM; return 0; } static void zonefs_destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy the inode cache. */ rcu_barrier(); kmem_cache_destroy(zonefs_inode_cachep); } static int __init zonefs_init(void) { int ret; BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); ret = zonefs_init_inodecache(); if (ret) return ret; ret = zonefs_sysfs_init(); if (ret) goto destroy_inodecache; ret = register_filesystem(&zonefs_type); if (ret) goto sysfs_exit; return 0; sysfs_exit: zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); return ret; } static void __exit zonefs_exit(void) { unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); } MODULE_AUTHOR("Damien Le Moal"); MODULE_DESCRIPTION("Zone file system for zoned block devices"); MODULE_LICENSE("GPL"); MODULE_ALIAS_FS("zonefs"); module_init(zonefs_init); module_exit(zonefs_exit); |
7965 2419 2 2417 10478 8 6 4 3616 3615 8103 8101 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/lockref.h> #if USE_CMPXCHG_LOCKREF /* * Note that the "cmpxchg()" reloads the "old" value for the * failure case. */ #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ int retry = 100; \ struct lockref old; \ BUILD_BUG_ON(sizeof(old) != 8); \ old.lock_count = READ_ONCE(lockref->lock_count); \ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old; \ CODE \ if (likely(try_cmpxchg64_relaxed(&lockref->lock_count, \ &old.lock_count, \ new.lock_count))) { \ SUCCESS; \ } \ if (!--retry) \ break; \ } \ } while (0) #else #define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) #endif /** * lockref_get - Increments reference count unconditionally * @lockref: pointer to lockref structure * * This operation is only valid if you already hold a reference * to the object, so you know the count cannot be zero. */ void lockref_get(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; , return; ); spin_lock(&lockref->lock); lockref->count++; spin_unlock(&lockref->lock); } EXPORT_SYMBOL(lockref_get); /** * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ int lockref_get_not_zero(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count++; if (old.count <= 0) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count > 0) { lockref->count++; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_get_not_zero); /** * lockref_put_not_zero - Decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count would become zero */ int lockref_put_not_zero(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count--; if (old.count <= 1) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count > 1) { lockref->count--; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_put_not_zero); /** * lockref_put_return - Decrement reference count if possible * @lockref: pointer to lockref structure * * Decrement the reference count and return the new value. * If the lockref was dead or locked, return an error. */ int lockref_put_return(struct lockref *lockref) { CMPXCHG_LOOP( new.count--; if (old.count <= 0) return -1; , return new.count; ); return -1; } EXPORT_SYMBOL(lockref_put_return); /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken */ int lockref_put_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count--; if (old.count <= 1) break; , return 1; ); spin_lock(&lockref->lock); if (lockref->count <= 1) return 0; lockref->count--; spin_unlock(&lockref->lock); return 1; } EXPORT_SYMBOL(lockref_put_or_lock); /** * lockref_mark_dead - mark lockref dead * @lockref: pointer to lockref structure */ void lockref_mark_dead(struct lockref *lockref) { assert_spin_locked(&lockref->lock); lockref->count = -128; } EXPORT_SYMBOL(lockref_mark_dead); /** * lockref_get_not_dead - Increments count unless the ref is dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if lockref was dead */ int lockref_get_not_dead(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count++; if (old.count < 0) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count >= 0) { lockref->count++; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_get_not_dead); |
23 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * i2c-core.h - interfaces internal to the I2C framework */ #include <linux/kconfig.h> #include <linux/rwsem.h> struct i2c_devinfo { struct list_head list; int busnum; struct i2c_board_info board_info; }; /* board_lock protects board_list and first_dynamic_bus_num. * only i2c core components are allowed to use these symbols. */ extern struct rw_semaphore __i2c_board_lock; extern struct list_head __i2c_board_list; extern int __i2c_first_dynamic_bus_num; int i2c_check_7bit_addr_validity_strict(unsigned short addr); int i2c_dev_irq_from_resources(const struct resource *resources, unsigned int num_resources); /* * We only allow atomic transfers for very late communication, e.g. to access a * PMIC when powering down. Atomic transfers are a corner case and not for * generic use! */ static inline bool i2c_in_atomic_xfer_mode(void) { return system_state > SYSTEM_RUNNING && (IS_ENABLED(CONFIG_PREEMPT_COUNT) ? !preemptible() : irqs_disabled()); } static inline int __i2c_lock_bus_helper(struct i2c_adapter *adap) { int ret = 0; if (i2c_in_atomic_xfer_mode()) { WARN(!adap->algo->master_xfer_atomic && !adap->algo->smbus_xfer_atomic, "No atomic I2C transfer handler for '%s'\n", dev_name(&adap->dev)); ret = i2c_trylock_bus(adap, I2C_LOCK_SEGMENT) ? 0 : -EAGAIN; } else { i2c_lock_bus(adap, I2C_LOCK_SEGMENT); } return ret; } static inline int __i2c_check_suspended(struct i2c_adapter *adap) { if (test_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags)) { if (!test_and_set_bit(I2C_ALF_SUSPEND_REPORTED, &adap->locked_flags)) dev_WARN(&adap->dev, "Transfer while suspended\n"); return -ESHUTDOWN; } return 0; } #ifdef CONFIG_ACPI void i2c_acpi_register_devices(struct i2c_adapter *adap); int i2c_acpi_get_irq(struct i2c_client *client, bool *wake_capable); #else /* CONFIG_ACPI */ static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { } static inline int i2c_acpi_get_irq(struct i2c_client *client, bool *wake_capable) { return 0; } #endif /* CONFIG_ACPI */ extern struct notifier_block i2c_acpi_notifier; #ifdef CONFIG_ACPI_I2C_OPREGION int i2c_acpi_install_space_handler(struct i2c_adapter *adapter); void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter); #else /* CONFIG_ACPI_I2C_OPREGION */ static inline int i2c_acpi_install_space_handler(struct i2c_adapter *adapter) { return 0; } static inline void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter) { } #endif /* CONFIG_ACPI_I2C_OPREGION */ #ifdef CONFIG_OF void of_i2c_register_devices(struct i2c_adapter *adap); #else static inline void of_i2c_register_devices(struct i2c_adapter *adap) { } #endif extern struct notifier_block i2c_of_notifier; #if IS_ENABLED(CONFIG_I2C_SMBUS) int i2c_setup_smbus_alert(struct i2c_adapter *adap); #else static inline int i2c_setup_smbus_alert(struct i2c_adapter *adap) { return 0; } #endif |
1 1 2 2 1 1 1 2 2 2 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 | // SPDX-License-Identifier: GPL-2.0-or-later /* * super.c * * load/unload driver, mount/dismount volumes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/random.h> #include <linux/statfs.h> #include <linux/moduleparam.h> #include <linux/blkdev.h> #include <linux/socket.h> #include <linux/inet.h> #include <linux/parser.h> #include <linux/crc32.h> #include <linux/debugfs.h> #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/quotaops.h> #include <linux/signal.h> #define CREATE_TRACE_POINTS #include "ocfs2_trace.h" #include <cluster/masklog.h> #include "ocfs2.h" /* this should be the only file to include a version 1 header */ #include "ocfs1_fs_compat.h" #include "alloc.h" #include "aops.h" #include "blockcheck.h" #include "dlmglue.h" #include "export.h" #include "extent_map.h" #include "heartbeat.h" #include "inode.h" #include "journal.h" #include "localalloc.h" #include "namei.h" #include "slot_map.h" #include "super.h" #include "sysfile.h" #include "uptodate.h" #include "xattr.h" #include "quota.h" #include "refcounttree.h" #include "suballoc.h" #include "buffer_head_io.h" #include "filecheck.h" static struct kmem_cache *ocfs2_inode_cachep; struct kmem_cache *ocfs2_dquot_cachep; struct kmem_cache *ocfs2_qf_chunk_cachep; static struct dentry *ocfs2_debugfs_root; MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("OCFS2 cluster file system"); struct mount_options { unsigned long commit_interval; unsigned long mount_opt; unsigned int atime_quantum; unsigned short slot; int localalloc_opt; unsigned int resv_level; int dir_resv_level; char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; }; static int ocfs2_parse_options(struct super_block *sb, char *options, struct mount_options *mopt, int is_remount); static int ocfs2_check_set_options(struct super_block *sb, struct mount_options *options); static int ocfs2_show_options(struct seq_file *s, struct dentry *root); static void ocfs2_put_super(struct super_block *sb); static int ocfs2_mount_volume(struct super_block *sb); static int ocfs2_remount(struct super_block *sb, int *flags, char *data); static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err); static int ocfs2_initialize_mem_caches(void); static void ocfs2_free_mem_caches(void); static void ocfs2_delete_osb(struct ocfs2_super *osb); static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf); static int ocfs2_sync_fs(struct super_block *sb, int wait); static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); static void ocfs2_release_system_inodes(struct ocfs2_super *osb); static int ocfs2_check_volume(struct ocfs2_super *osb); static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct buffer_head *bh, u32 sectsize, struct ocfs2_blockcheck_stats *stats); static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, int sector_size, struct ocfs2_blockcheck_stats *stats); static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size); static struct inode *ocfs2_alloc_inode(struct super_block *sb); static void ocfs2_free_inode(struct inode *inode); static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); static int ocfs2_enable_quotas(struct ocfs2_super *osb); static void ocfs2_disable_quotas(struct ocfs2_super *osb); static struct dquot **ocfs2_get_dquots(struct inode *inode) { return OCFS2_I(inode)->i_dquot; } static const struct super_operations ocfs2_sops = { .statfs = ocfs2_statfs, .alloc_inode = ocfs2_alloc_inode, .free_inode = ocfs2_free_inode, .drop_inode = ocfs2_drop_inode, .evict_inode = ocfs2_evict_inode, .sync_fs = ocfs2_sync_fs, .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, .show_options = ocfs2_show_options, .quota_read = ocfs2_quota_read, .quota_write = ocfs2_quota_write, .get_dquots = ocfs2_get_dquots, }; enum { Opt_barrier, Opt_err_panic, Opt_err_ro, Opt_intr, Opt_nointr, Opt_hb_none, Opt_hb_local, Opt_hb_global, Opt_data_ordered, Opt_data_writeback, Opt_atime_quantum, Opt_slot, Opt_commit, Opt_localalloc, Opt_localflocks, Opt_stack, Opt_user_xattr, Opt_nouser_xattr, Opt_inode64, Opt_acl, Opt_noacl, Opt_usrquota, Opt_grpquota, Opt_coherency_buffered, Opt_coherency_full, Opt_resv_level, Opt_dir_resv_level, Opt_journal_async_commit, Opt_err_cont, Opt_err, }; static const match_table_t tokens = { {Opt_barrier, "barrier=%u"}, {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, {Opt_intr, "intr"}, {Opt_nointr, "nointr"}, {Opt_hb_none, OCFS2_HB_NONE}, {Opt_hb_local, OCFS2_HB_LOCAL}, {Opt_hb_global, OCFS2_HB_GLOBAL}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, {Opt_atime_quantum, "atime_quantum=%u"}, {Opt_slot, "preferred_slot=%u"}, {Opt_commit, "commit=%u"}, {Opt_localalloc, "localalloc=%d"}, {Opt_localflocks, "localflocks"}, {Opt_stack, "cluster_stack=%s"}, {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_inode64, "inode64"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_usrquota, "usrquota"}, {Opt_grpquota, "grpquota"}, {Opt_coherency_buffered, "coherency=buffered"}, {Opt_coherency_full, "coherency=full"}, {Opt_resv_level, "resv_level=%u"}, {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_err_cont, "errors=continue"}, {Opt_err, NULL} }; #ifdef CONFIG_DEBUG_FS static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) { struct ocfs2_cluster_connection *cconn = osb->cconn; struct ocfs2_recovery_map *rm = osb->recovery_map; struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; int i, out = 0; unsigned long flags; out += scnprintf(buf + out, len - out, "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", "Device", osb->dev_str, osb->uuid_str, osb->fs_generation, osb->vol_label); out += scnprintf(buf + out, len - out, "%10s => State: %d Flags: 0x%lX\n", "Volume", atomic_read(&osb->vol_state), osb->osb_flags); out += scnprintf(buf + out, len - out, "%10s => Block: %lu Cluster: %d\n", "Sizes", osb->sb->s_blocksize, osb->s_clustersize); out += scnprintf(buf + out, len - out, "%10s => Compat: 0x%X Incompat: 0x%X " "ROcompat: 0x%X\n", "Features", osb->s_feature_compat, osb->s_feature_incompat, osb->s_feature_ro_compat); out += scnprintf(buf + out, len - out, "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", osb->s_mount_opt, osb->s_atime_quantum); if (cconn) { out += scnprintf(buf + out, len - out, "%10s => Stack: %s Name: %*s " "Version: %d.%d\n", "Cluster", (*osb->osb_cluster_stack == '\0' ? "o2cb" : osb->osb_cluster_stack), cconn->cc_namelen, cconn->cc_name, cconn->cc_version.pv_major, cconn->cc_version.pv_minor); } spin_lock_irqsave(&osb->dc_task_lock, flags); out += scnprintf(buf + out, len - out, "%10s => Pid: %d Count: %lu WakeSeq: %lu " "WorkSeq: %lu\n", "DownCnvt", (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), osb->blocked_lock_count, osb->dc_wake_sequence, osb->dc_work_sequence); spin_unlock_irqrestore(&osb->dc_task_lock, flags); spin_lock(&osb->osb_lock); out += scnprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", "Recovery", (osb->recovery_thread_task ? task_pid_nr(osb->recovery_thread_task) : -1)); if (rm->rm_used == 0) out += scnprintf(buf + out, len - out, " None\n"); else { for (i = 0; i < rm->rm_used; i++) out += scnprintf(buf + out, len - out, " %d", rm->rm_entries[i]); out += scnprintf(buf + out, len - out, "\n"); } spin_unlock(&osb->osb_lock); out += scnprintf(buf + out, len - out, "%10s => Pid: %d Interval: %lu\n", "Commit", (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), osb->osb_commit_interval); out += scnprintf(buf + out, len - out, "%10s => State: %d TxnId: %lu NumTxns: %d\n", "Journal", osb->journal->j_state, osb->journal->j_trans_id, atomic_read(&osb->journal->j_num_trans)); out += scnprintf(buf + out, len - out, "%10s => GlobalAllocs: %d LocalAllocs: %d " "SubAllocs: %d LAWinMoves: %d SAExtends: %d\n", "Stats", atomic_read(&osb->alloc_stats.bitmap_data), atomic_read(&osb->alloc_stats.local_data), atomic_read(&osb->alloc_stats.bg_allocs), atomic_read(&osb->alloc_stats.moves), atomic_read(&osb->alloc_stats.bg_extends)); out += scnprintf(buf + out, len - out, "%10s => State: %u Descriptor: %llu Size: %u bits " "Default: %u bits\n", "LocalAlloc", osb->local_alloc_state, (unsigned long long)osb->la_last_gd, osb->local_alloc_bits, osb->local_alloc_default_bits); spin_lock(&osb->osb_lock); out += scnprintf(buf + out, len - out, "%10s => InodeSlot: %d StolenInodes: %d, " "MetaSlot: %d StolenMeta: %d\n", "Steal", osb->s_inode_steal_slot, atomic_read(&osb->s_num_inodes_stolen), osb->s_meta_steal_slot, atomic_read(&osb->s_num_meta_stolen)); spin_unlock(&osb->osb_lock); out += scnprintf(buf + out, len - out, "OrphanScan => "); out += scnprintf(buf + out, len - out, "Local: %u Global: %u ", os->os_count, os->os_seqno); out += scnprintf(buf + out, len - out, " Last Scan: "); if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) out += scnprintf(buf + out, len - out, "Disabled\n"); else out += scnprintf(buf + out, len - out, "%lu seconds ago\n", (unsigned long)(ktime_get_seconds() - os->os_scantime)); out += scnprintf(buf + out, len - out, "%10s => %3s %10s\n", "Slots", "Num", "RecoGen"); for (i = 0; i < osb->max_slots; ++i) { out += scnprintf(buf + out, len - out, "%10s %c %3d %10d\n", " ", (i == osb->slot_num ? '*' : ' '), i, osb->slot_recovery_generations[i]); } return out; } static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) { struct ocfs2_super *osb = inode->i_private; char *buf = NULL; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) goto bail; i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE)); file->private_data = buf; return 0; bail: return -ENOMEM; } static int ocfs2_debug_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, i_size_read(file->f_mapping->host)); } #else static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) { return 0; } static int ocfs2_debug_release(struct inode *inode, struct file *file) { return 0; } static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { return 0; } #endif /* CONFIG_DEBUG_FS */ static const struct file_operations ocfs2_osb_debug_fops = { .open = ocfs2_osb_debug_open, .release = ocfs2_debug_release, .read = ocfs2_debug_read, .llseek = generic_file_llseek, }; static int ocfs2_sync_fs(struct super_block *sb, int wait) { int status; tid_t target; struct ocfs2_super *osb = OCFS2_SB(sb); if (ocfs2_is_hard_readonly(osb)) return -EROFS; if (wait) { status = ocfs2_flush_truncate_log(osb); if (status < 0) mlog_errno(status); } else { ocfs2_schedule_truncate_log_flush(osb, 0); } if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) { if (wait) jbd2_log_wait_commit(osb->journal->j_journal, target); } return 0; } static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino) { if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA) && (ino == USER_QUOTA_SYSTEM_INODE || ino == LOCAL_USER_QUOTA_SYSTEM_INODE)) return 0; if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) && (ino == GROUP_QUOTA_SYSTEM_INODE || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE)) return 0; return 1; } static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) { struct inode *new = NULL; int status = 0; int i; new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); if (IS_ERR(new)) { status = PTR_ERR(new); mlog_errno(status); goto bail; } osb->root_inode = new; new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0); if (IS_ERR(new)) { status = PTR_ERR(new); mlog_errno(status); goto bail; } osb->sys_root_inode = new; for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { if (!ocfs2_need_system_inode(osb, i)) continue; new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL; mlog_errno(status); mlog(ML_ERROR, "Unable to load system inode %d, " "possibly corrupt fs?", i); goto bail; } // the array now has one ref, so drop this one iput(new); } bail: if (status) mlog_errno(status); return status; } static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) { struct inode *new = NULL; int status = 0; int i; for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES; i++) { if (!ocfs2_need_system_inode(osb, i)) continue; new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL; mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n", status, i, osb->slot_num); goto bail; } /* the array now has one ref, so drop this one */ iput(new); } bail: if (status) mlog_errno(status); return status; } static void ocfs2_release_system_inodes(struct ocfs2_super *osb) { int i; struct inode *inode; for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { inode = osb->global_system_inodes[i]; if (inode) { iput(inode); osb->global_system_inodes[i] = NULL; } } inode = osb->sys_root_inode; if (inode) { iput(inode); osb->sys_root_inode = NULL; } inode = osb->root_inode; if (inode) { iput(inode); osb->root_inode = NULL; } if (!osb->local_system_inodes) return; for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { if (osb->local_system_inodes[i]) { iput(osb->local_system_inodes[i]); osb->local_system_inodes[i] = NULL; } } kfree(osb->local_system_inodes); osb->local_system_inodes = NULL; } /* We're allocating fs objects, use GFP_NOFS */ static struct inode *ocfs2_alloc_inode(struct super_block *sb) { struct ocfs2_inode_info *oi; oi = alloc_inode_sb(sb, ocfs2_inode_cachep, GFP_NOFS); if (!oi) return NULL; oi->i_sync_tid = 0; oi->i_datasync_tid = 0; memset(&oi->i_dquot, 0, sizeof(oi->i_dquot)); jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); return &oi->vfs_inode; } static void ocfs2_free_inode(struct inode *inode) { kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); } static unsigned long long ocfs2_max_file_offset(unsigned int bbits, unsigned int cbits) { unsigned int bytes = 1 << cbits; unsigned int trim = bytes; unsigned int bitshift = 32; /* * i_size and all block offsets in ocfs2 are always 64 bits * wide. i_clusters is 32 bits, in cluster-sized units. So on * 64 bit platforms, cluster size will be the limiting factor. */ #if BITS_PER_LONG == 32 BUILD_BUG_ON(sizeof(sector_t) != 8); /* * We might be limited by page cache size. */ if (bytes > PAGE_SIZE) { bytes = PAGE_SIZE; trim = 1; /* * Shift by 31 here so that we don't get larger than * MAX_LFS_FILESIZE */ bitshift = 31; } #endif /* * Trim by a whole cluster when we can actually approach the * on-disk limits. Otherwise we can overflow i_clusters when * an extent start is at the max offset. */ return (((unsigned long long)bytes) << bitshift) - trim; } static int ocfs2_remount(struct super_block *sb, int *flags, char *data) { int incompat_features; int ret = 0; struct mount_options parsed_options; struct ocfs2_super *osb = OCFS2_SB(sb); u32 tmp; sync_filesystem(sb); if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || !ocfs2_check_set_options(sb, &parsed_options)) { ret = -EINVAL; goto out; } tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE; if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); goto out; } if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) != (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change data mode on remount\n"); goto out; } /* Probably don't want this on remount; it might * mess with other nodes */ if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) && (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot enable inode64 on remount\n"); goto out; } /* We're going to/from readonly mode. */ if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { /* Disable quota accounting before remounting RO */ if (*flags & SB_RDONLY) { ret = ocfs2_susp_quotas(osb, 0); if (ret < 0) goto out; } /* Lock here so the check of HARD_RO and the potential * setting of SOFT_RO is atomic. */ spin_lock(&osb->osb_lock); if (osb->osb_flags & OCFS2_OSB_HARD_RO) { mlog(ML_ERROR, "Remount on readonly device is forbidden.\n"); ret = -EROFS; goto unlock_osb; } if (*flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; } else { if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { mlog(ML_ERROR, "Cannot remount RDWR " "filesystem due to previous errors.\n"); ret = -EROFS; goto unlock_osb; } incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP); if (incompat_features) { mlog(ML_ERROR, "Cannot remount RDWR because " "of unsupported optional features " "(%x).\n", incompat_features); ret = -EINVAL; goto unlock_osb; } sb->s_flags &= ~SB_RDONLY; osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; } trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags); unlock_osb: spin_unlock(&osb->osb_lock); /* Enable quota accounting after remounting RW */ if (!ret && !(*flags & SB_RDONLY)) { if (sb_any_quota_suspended(sb)) ret = ocfs2_susp_quotas(osb, 1); else ret = ocfs2_enable_quotas(osb); if (ret < 0) { /* Return back changes... */ spin_lock(&osb->osb_lock); sb->s_flags |= SB_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; spin_unlock(&osb->osb_lock); goto out; } } } if (!ret) { /* Only save off the new mount options in case of a successful * remount. */ osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; if (parsed_options.commit_interval) osb->osb_commit_interval = parsed_options.commit_interval; if (!ocfs2_is_hard_readonly(osb)) ocfs2_set_journal_params(osb); sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); } out: return ret; } static int ocfs2_sb_probe(struct super_block *sb, struct buffer_head **bh, int *sector_size, struct ocfs2_blockcheck_stats *stats) { int status, tmpstat; struct ocfs1_vol_disk_hdr *hdr; struct ocfs2_dinode *di; int blksize; *bh = NULL; /* may be > 512 */ *sector_size = bdev_logical_block_size(sb->s_bdev); if (*sector_size > OCFS2_MAX_BLOCKSIZE) { mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", *sector_size, OCFS2_MAX_BLOCKSIZE); status = -EINVAL; goto bail; } /* Can this really happen? */ if (*sector_size < OCFS2_MIN_BLOCKSIZE) *sector_size = OCFS2_MIN_BLOCKSIZE; /* check block zero for old format */ status = ocfs2_get_sector(sb, bh, 0, *sector_size); if (status < 0) { mlog_errno(status); goto bail; } hdr = (struct ocfs1_vol_disk_hdr *) (*bh)->b_data; if (hdr->major_version == OCFS1_MAJOR_VERSION) { mlog(ML_ERROR, "incompatible version: %u.%u\n", hdr->major_version, hdr->minor_version); status = -EINVAL; } if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE, strlen(OCFS1_VOLUME_SIGNATURE)) == 0) { mlog(ML_ERROR, "incompatible volume signature: %8s\n", hdr->signature); status = -EINVAL; } brelse(*bh); *bh = NULL; if (status < 0) { mlog(ML_ERROR, "This is an ocfs v1 filesystem which must be " "upgraded before mounting with ocfs v2\n"); goto bail; } /* * Now check at magic offset for 512, 1024, 2048, 4096 * blocksizes. 4096 is the maximum blocksize because it is * the minimum clustersize. */ status = -EINVAL; for (blksize = *sector_size; blksize <= OCFS2_MAX_BLOCKSIZE; blksize <<= 1) { tmpstat = ocfs2_get_sector(sb, bh, OCFS2_SUPER_BLOCK_BLKNO, blksize); if (tmpstat < 0) { status = tmpstat; mlog_errno(status); break; } di = (struct ocfs2_dinode *) (*bh)->b_data; memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); spin_lock_init(&stats->b_lock); tmpstat = ocfs2_verify_volume(di, *bh, blksize, stats); if (tmpstat < 0) { brelse(*bh); *bh = NULL; } if (tmpstat != -EAGAIN) { status = tmpstat; break; } } bail: return status; } static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) { u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; if (osb->s_mount_opt & hb_enabled) { if (ocfs2_mount_local(osb)) { mlog(ML_ERROR, "Cannot heartbeat on a locally " "mounted device.\n"); return -EINVAL; } if (ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Userspace stack expected, but " "o2cb heartbeat arguments passed to mount\n"); return -EINVAL; } if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && !ocfs2_cluster_o2cb_global_heartbeat(osb)) || ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && ocfs2_cluster_o2cb_global_heartbeat(osb))) { mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); return -EINVAL; } } if (!(osb->s_mount_opt & hb_enabled)) { if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && !ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Heartbeat has to be started to mount " "a read-write clustered device.\n"); return -EINVAL; } } return 0; } /* * If we're using a userspace stack, mount should have passed * a name that matches the disk. If not, mount should not * have passed a stack. */ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, struct mount_options *mopt) { if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) { mlog(ML_ERROR, "cluster stack passed to mount, but this filesystem " "does not support it\n"); return -EINVAL; } if (ocfs2_userspace_stack(osb) && strncmp(osb->osb_cluster_stack, mopt->cluster_stack, OCFS2_STACK_LABEL_LEN)) { mlog(ML_ERROR, "cluster stack passed to mount (\"%s\") does not " "match the filesystem (\"%s\")\n", mopt->cluster_stack, osb->osb_cluster_stack); return -EINVAL; } return 0; } static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) { int type; struct super_block *sb = osb->sb; unsigned int feature[OCFS2_MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; int status = 0; for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) continue; if (unsuspend) status = dquot_resume(sb, type); else { struct ocfs2_mem_dqinfo *oinfo; /* Cancel periodic syncing before suspending */ oinfo = sb_dqinfo(sb, type)->dqi_priv; cancel_delayed_work_sync(&oinfo->dqi_sync_work); status = dquot_suspend(sb, type); } if (status < 0) break; } if (status < 0) mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on " "remount (error = %d).\n", status); return status; } static int ocfs2_enable_quotas(struct ocfs2_super *osb) { struct inode *inode[OCFS2_MAXQUOTAS] = { NULL, NULL }; struct super_block *sb = osb->sb; unsigned int feature[OCFS2_MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; int status; int type; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE; for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) continue; inode[type] = ocfs2_get_system_file_inode(osb, ino[type], osb->slot_num); if (!inode[type]) { status = -ENOENT; goto out_quota_off; } status = dquot_load_quota_inode(inode[type], type, QFMT_OCFS2, DQUOT_USAGE_ENABLED); if (status < 0) goto out_quota_off; } for (type = 0; type < OCFS2_MAXQUOTAS; type++) iput(inode[type]); return 0; out_quota_off: ocfs2_disable_quotas(osb); for (type = 0; type < OCFS2_MAXQUOTAS; type++) iput(inode[type]); mlog_errno(status); return status; } static void ocfs2_disable_quotas(struct ocfs2_super *osb) { int type; struct inode *inode; struct super_block *sb = osb->sb; struct ocfs2_mem_dqinfo *oinfo; /* We mostly ignore errors in this function because there's not much * we can do when we see them */ for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (!sb_has_quota_loaded(sb, type)) continue; if (!sb_has_quota_suspended(sb, type)) { oinfo = sb_dqinfo(sb, type)->dqi_priv; cancel_delayed_work_sync(&oinfo->dqi_sync_work); } inode = igrab(sb->s_dquot.files[type]); /* Turn off quotas. This will remove all dquot structures from * memory and so they will be automatically synced to global * quota files */ dquot_disable(sb, type, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); iput(inode); } } static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; int status, sector_size; struct mount_options parsed_options; struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; char nodestr[12]; struct ocfs2_blockcheck_stats stats; trace_ocfs2_fill_super(sb, data, silent); if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { status = -EINVAL; goto out; } /* probe for superblock */ status = ocfs2_sb_probe(sb, &bh, §or_size, &stats); if (status < 0) { mlog(ML_ERROR, "superblock probe failed!\n"); goto out; } status = ocfs2_initialize_super(sb, bh, sector_size, &stats); brelse(bh); bh = NULL; if (status < 0) goto out; osb = OCFS2_SB(sb); if (!ocfs2_check_set_options(sb, &parsed_options)) { status = -EINVAL; goto out_super; } osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; osb->osb_commit_interval = parsed_options.commit_interval; ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt); osb->osb_resv_level = parsed_options.resv_level; osb->osb_dir_resv_level = parsed_options.resv_level; if (parsed_options.dir_resv_level == -1) osb->osb_dir_resv_level = parsed_options.resv_level; else osb->osb_dir_resv_level = parsed_options.dir_resv_level; status = ocfs2_verify_userspace_stack(osb, &parsed_options); if (status) goto out_super; sb->s_magic = OCFS2_SUPER_MAGIC; sb->s_flags = (sb->s_flags & ~(SB_POSIXACL | SB_NOSEC)) | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); /* Hard readonly mode only if: bdev_read_only, SB_RDONLY, * heartbeat=none */ if (bdev_read_only(sb->s_bdev)) { if (!sb_rdonly(sb)) { status = -EACCES; mlog(ML_ERROR, "Readonly device detected but readonly " "mount was not specified.\n"); goto out_super; } /* You should not be able to start a local heartbeat * on a readonly device. */ if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { status = -EROFS; mlog(ML_ERROR, "Local heartbeat specified on readonly " "device.\n"); goto out_super; } status = ocfs2_check_journals_nolocks(osb); if (status < 0) { if (status == -EROFS) mlog(ML_ERROR, "Recovery required on readonly " "file system, but write access is " "unavailable.\n"); goto out_super; } ocfs2_set_ro_flag(osb, 1); printk(KERN_NOTICE "ocfs2: Readonly device (%s) detected. " "Cluster services will not be used for this mount. " "Recovery will be skipped.\n", osb->dev_str); } if (!ocfs2_is_hard_readonly(osb)) { if (sb_rdonly(sb)) ocfs2_set_ro_flag(osb, 0); } status = ocfs2_verify_heartbeat(osb); if (status < 0) goto out_super; osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); if (ocfs2_meta_ecc(osb)) ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, osb->osb_debug_root); status = ocfs2_mount_volume(sb); if (status < 0) goto out_debugfs; if (osb->root_inode) inode = igrab(osb->root_inode); if (!inode) { status = -EIO; goto out_dismount; } osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj); if (!osb->osb_dev_kset) { status = -ENOMEM; mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id); goto out_dismount; } /* Create filecheck sysfs related directories/files at * /sys/fs/ocfs2/<devname>/filecheck */ if (ocfs2_filecheck_create_sysfs(osb)) { status = -ENOMEM; mlog(ML_ERROR, "Unable to create filecheck sysfs directory at " "/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id); goto out_dismount; } root = d_make_root(inode); if (!root) { status = -ENOMEM; goto out_dismount; } sb->s_root = root; ocfs2_complete_mount_recovery(osb); if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " "with %s data mode.\n", osb->dev_str, nodestr, osb->slot_num, osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); /* Now we can initialize quotas because we can afford to wait * for cluster locks recovery now. That also means that truncation * log recovery can happen but that waits for proper quota setup */ if (!sb_rdonly(sb)) { status = ocfs2_enable_quotas(osb); if (status < 0) { /* We have to err-out specially here because * s_root is already set */ mlog_errno(status); atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); return status; } } ocfs2_complete_quota_recovery(osb); /* Now we wake up again for processes waiting for quotas */ atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); wake_up(&osb->osb_mount_event); /* Start this when the mount is almost sure of being successful */ ocfs2_orphan_scan_start(osb); return status; out_dismount: atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); ocfs2_free_replay_slots(osb); ocfs2_dismount_volume(sb, 1); goto out; out_debugfs: debugfs_remove_recursive(osb->osb_debug_root); out_super: ocfs2_release_system_inodes(osb); kfree(osb->recovery_map); ocfs2_delete_osb(osb); kfree(osb); out: mlog_errno(status); return status; } static struct dentry *ocfs2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); } static struct file_system_type ocfs2_fs_type = { .owner = THIS_MODULE, .name = "ocfs2", .mount = ocfs2_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, .next = NULL }; MODULE_ALIAS_FS("ocfs2"); static int ocfs2_check_set_options(struct super_block *sb, struct mount_options *options) { if (options->mount_opt & OCFS2_MOUNT_USRQUOTA && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { mlog(ML_ERROR, "User quotas were requested, but this " "filesystem does not have the feature enabled.\n"); return 0; } if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { mlog(ML_ERROR, "Group quotas were requested, but this " "filesystem does not have the feature enabled.\n"); return 0; } if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL && !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) { mlog(ML_ERROR, "ACL support requested but extended attributes " "feature is not enabled\n"); return 0; } /* No ACL setting specified? Use XATTR feature... */ if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL | OCFS2_MOUNT_NO_POSIX_ACL))) { if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) options->mount_opt |= OCFS2_MOUNT_POSIX_ACL; else options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; } return 1; } static int ocfs2_parse_options(struct super_block *sb, char *options, struct mount_options *mopt, int is_remount) { int status, user_stack = 0; char *p; u32 tmp; int token, option; substring_t args[MAX_OPT_ARGS]; trace_ocfs2_parse_options(is_remount, options ? options : "(none)"); mopt->commit_interval = 0; mopt->mount_opt = OCFS2_MOUNT_NOINTR; mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; mopt->slot = OCFS2_INVALID_SLOT; mopt->localalloc_opt = -1; mopt->cluster_stack[0] = '\0'; mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL; mopt->dir_resv_level = -1; if (!options) { status = 1; goto bail; } while ((p = strsep(&options, ",")) != NULL) { if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_hb_local: mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; break; case Opt_hb_none: mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; break; case Opt_hb_global: mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; break; case Opt_barrier: if (match_int(&args[0], &option)) { status = 0; goto bail; } if (option) mopt->mount_opt |= OCFS2_MOUNT_BARRIER; else mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER; break; case Opt_intr: mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR; break; case Opt_nointr: mopt->mount_opt |= OCFS2_MOUNT_NOINTR; break; case Opt_err_panic: mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT; mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS; mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; break; case Opt_err_ro: mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT; mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; mopt->mount_opt |= OCFS2_MOUNT_ERRORS_ROFS; break; case Opt_err_cont: mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS; mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; mopt->mount_opt |= OCFS2_MOUNT_ERRORS_CONT; break; case Opt_data_ordered: mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; break; case Opt_data_writeback: mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; break; case Opt_user_xattr: mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR; break; case Opt_nouser_xattr: mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR; break; case Opt_atime_quantum: if (match_int(&args[0], &option)) { status = 0; goto bail; } if (option >= 0) mopt->atime_quantum = option; break; case Opt_slot: if (match_int(&args[0], &option)) { status = 0; goto bail; } if (option) mopt->slot = (u16)option; break; case Opt_commit: if (match_int(&args[0], &option)) { status = 0; goto bail; } if (option < 0) return 0; if (option == 0) option = JBD2_DEFAULT_MAX_COMMIT_AGE; mopt->commit_interval = HZ * option; break; case Opt_localalloc: if (match_int(&args[0], &option)) { status = 0; goto bail; } if (option >= 0) mopt->localalloc_opt = option; break; case Opt_localflocks: /* * Changing this during remount could race * flock() requests, or "unbalance" existing * ones (e.g., a lock is taken in one mode but * dropped in the other). If users care enough * to flip locking modes during remount, we * could add a "local" flag to individual * flock structures for proper tracking of * state. */ if (!is_remount) mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; break; case Opt_stack: /* Check both that the option we were passed * is of the right length and that it is a proper * string of the right length. */ if (((args[0].to - args[0].from) != OCFS2_STACK_LABEL_LEN) || (strnlen(args[0].from, OCFS2_STACK_LABEL_LEN) != OCFS2_STACK_LABEL_LEN)) { mlog(ML_ERROR, "Invalid cluster_stack option\n"); status = 0; goto bail; } memcpy(mopt->cluster_stack, args[0].from, OCFS2_STACK_LABEL_LEN); mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; /* * Open code the memcmp here as we don't have * an osb to pass to * ocfs2_userspace_stack(). */ if (memcmp(mopt->cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, OCFS2_STACK_LABEL_LEN)) user_stack = 1; break; case Opt_inode64: mopt->mount_opt |= OCFS2_MOUNT_INODE64; break; case Opt_usrquota: mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; break; case Opt_grpquota: mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; break; case Opt_coherency_buffered: mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; break; case Opt_coherency_full: mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; break; case Opt_acl: mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; break; case Opt_noacl: mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; break; case Opt_resv_level: if (is_remount) break; if (match_int(&args[0], &option)) { status = 0; goto bail; } if (option >= OCFS2_MIN_RESV_LEVEL && option < OCFS2_MAX_RESV_LEVEL) mopt->resv_level = option; break; case Opt_dir_resv_level: if (is_remount) break; if (match_int(&args[0], &option)) { status = 0; goto bail; } if (option >= OCFS2_MIN_RESV_LEVEL && option < OCFS2_MAX_RESV_LEVEL) mopt->dir_resv_level = option; break; case Opt_journal_async_commit: mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " "or missing value\n", p); status = 0; goto bail; } } if (user_stack == 0) { /* Ensure only one heartbeat mode */ tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE); if (hweight32(tmp) != 1) { mlog(ML_ERROR, "Invalid heartbeat mount options\n"); status = 0; goto bail; } } status = 1; bail: return status; } static int ocfs2_show_options(struct seq_file *s, struct dentry *root) { struct ocfs2_super *osb = OCFS2_SB(root->d_sb); unsigned long opts = osb->s_mount_opt; unsigned int local_alloc_megs; if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { seq_printf(s, ",_netdev"); if (opts & OCFS2_MOUNT_HB_LOCAL) seq_printf(s, ",%s", OCFS2_HB_LOCAL); else seq_printf(s, ",%s", OCFS2_HB_GLOBAL); } else seq_printf(s, ",%s", OCFS2_HB_NONE); if (opts & OCFS2_MOUNT_NOINTR) seq_printf(s, ",nointr"); if (opts & OCFS2_MOUNT_DATA_WRITEBACK) seq_printf(s, ",data=writeback"); else seq_printf(s, ",data=ordered"); if (opts & OCFS2_MOUNT_BARRIER) seq_printf(s, ",barrier=1"); if (opts & OCFS2_MOUNT_ERRORS_PANIC) seq_printf(s, ",errors=panic"); else if (opts & OCFS2_MOUNT_ERRORS_CONT) seq_printf(s, ",errors=continue"); else seq_printf(s, ",errors=remount-ro"); if (osb->preferred_slot != OCFS2_INVALID_SLOT) seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); if (osb->osb_commit_interval) seq_printf(s, ",commit=%u", (unsigned) (osb->osb_commit_interval / HZ)); local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); if (local_alloc_megs != ocfs2_la_default_mb(osb)) seq_printf(s, ",localalloc=%d", local_alloc_megs); if (opts & OCFS2_MOUNT_LOCALFLOCKS) seq_printf(s, ",localflocks,"); if (osb->osb_cluster_stack[0]) seq_show_option(s, "cluster_stack", osb->osb_cluster_stack); if (opts & OCFS2_MOUNT_USRQUOTA) seq_printf(s, ",usrquota"); if (opts & OCFS2_MOUNT_GRPQUOTA) seq_printf(s, ",grpquota"); if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) seq_printf(s, ",coherency=buffered"); else seq_printf(s, ",coherency=full"); if (opts & OCFS2_MOUNT_NOUSERXATTR) seq_printf(s, ",nouser_xattr"); else seq_printf(s, ",user_xattr"); if (opts & OCFS2_MOUNT_INODE64) seq_printf(s, ",inode64"); if (opts & OCFS2_MOUNT_POSIX_ACL) seq_printf(s, ",acl"); else seq_printf(s, ",noacl"); if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL) seq_printf(s, ",resv_level=%d", osb->osb_resv_level); if (osb->osb_dir_resv_level != osb->osb_resv_level) seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) seq_printf(s, ",journal_async_commit"); return 0; } static int __init ocfs2_init(void) { int status; status = init_ocfs2_uptodate_cache(); if (status < 0) goto out1; status = ocfs2_initialize_mem_caches(); if (status < 0) goto out2; ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); ocfs2_set_locking_protocol(); status = register_quota_format(&ocfs2_quota_format); if (status < 0) goto out3; status = register_filesystem(&ocfs2_fs_type); if (!status) return 0; unregister_quota_format(&ocfs2_quota_format); out3: debugfs_remove(ocfs2_debugfs_root); ocfs2_free_mem_caches(); out2: exit_ocfs2_uptodate_cache(); out1: mlog_errno(status); return status; } static void __exit ocfs2_exit(void) { unregister_quota_format(&ocfs2_quota_format); debugfs_remove(ocfs2_debugfs_root); ocfs2_free_mem_caches(); unregister_filesystem(&ocfs2_fs_type); exit_ocfs2_uptodate_cache(); } static void ocfs2_put_super(struct super_block *sb) { trace_ocfs2_put_super(sb); ocfs2_sync_blockdev(sb); ocfs2_dismount_volume(sb, 0); } static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ocfs2_super *osb; u32 numbits, freebits; int status; struct ocfs2_dinode *bm_lock; struct buffer_head *bh = NULL; struct inode *inode = NULL; trace_ocfs2_statfs(dentry->d_sb, buf); osb = OCFS2_SB(dentry->d_sb); inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!inode) { mlog(ML_ERROR, "failed to get bitmap inode\n"); status = -EIO; goto bail; } status = ocfs2_inode_lock(inode, &bh, 0); if (status < 0) { mlog_errno(status); goto bail; } bm_lock = (struct ocfs2_dinode *) bh->b_data; numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total); freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used); buf->f_type = OCFS2_SUPER_MAGIC; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_namelen = OCFS2_MAX_FILENAME_LEN; buf->f_blocks = ((sector_t) numbits) * (osb->s_clustersize >> osb->sb->s_blocksize_bits); buf->f_bfree = ((sector_t) freebits) * (osb->s_clustersize >> osb->sb->s_blocksize_bits); buf->f_bavail = buf->f_bfree; buf->f_files = numbits; buf->f_ffree = freebits; buf->f_fsid.val[0] = crc32_le(0, osb->uuid_str, OCFS2_VOL_UUID_LEN) & 0xFFFFFFFFUL; buf->f_fsid.val[1] = crc32_le(0, osb->uuid_str + OCFS2_VOL_UUID_LEN, OCFS2_VOL_UUID_LEN) & 0xFFFFFFFFUL; brelse(bh); ocfs2_inode_unlock(inode, 0); status = 0; bail: iput(inode); if (status) mlog_errno(status); return status; } static void ocfs2_inode_init_once(void *data) { struct ocfs2_inode_info *oi = data; oi->ip_flags = 0; oi->ip_open_count = 0; spin_lock_init(&oi->ip_lock); ocfs2_extent_map_init(&oi->vfs_inode); INIT_LIST_HEAD(&oi->ip_io_markers); INIT_LIST_HEAD(&oi->ip_unwritten_list); oi->ip_dir_start_lookup = 0; init_rwsem(&oi->ip_alloc_sem); init_rwsem(&oi->ip_xattr_sem); mutex_init(&oi->ip_io_mutex); oi->ip_blkno = 0ULL; oi->ip_clusters = 0; oi->ip_next_orphan = NULL; ocfs2_resv_init_once(&oi->ip_la_data_resv); ocfs2_lock_res_init_once(&oi->ip_rw_lockres); ocfs2_lock_res_init_once(&oi->ip_inode_lockres); ocfs2_lock_res_init_once(&oi->ip_open_lockres); ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), &ocfs2_inode_caching_ops); inode_init_once(&oi->vfs_inode); } static int ocfs2_initialize_mem_caches(void) { ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache", sizeof(struct ocfs2_inode_info), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_ACCOUNT), ocfs2_inode_init_once); ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache", sizeof(struct ocfs2_dquot), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), NULL); ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache", sizeof(struct ocfs2_quota_chunk), 0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), NULL); if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || !ocfs2_qf_chunk_cachep) { kmem_cache_destroy(ocfs2_inode_cachep); kmem_cache_destroy(ocfs2_dquot_cachep); kmem_cache_destroy(ocfs2_qf_chunk_cachep); return -ENOMEM; } return 0; } static void ocfs2_free_mem_caches(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(ocfs2_inode_cachep); ocfs2_inode_cachep = NULL; kmem_cache_destroy(ocfs2_dquot_cachep); ocfs2_dquot_cachep = NULL; kmem_cache_destroy(ocfs2_qf_chunk_cachep); ocfs2_qf_chunk_cachep = NULL; } static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size) { if (!sb_set_blocksize(sb, sect_size)) { mlog(ML_ERROR, "unable to set blocksize\n"); return -EIO; } *bh = sb_getblk(sb, block); if (!*bh) { mlog_errno(-ENOMEM); return -ENOMEM; } lock_buffer(*bh); if (!buffer_dirty(*bh)) clear_buffer_uptodate(*bh); unlock_buffer(*bh); if (bh_read(*bh, 0) < 0) { mlog_errno(-EIO); brelse(*bh); *bh = NULL; return -EIO; } return 0; } static int ocfs2_mount_volume(struct super_block *sb) { int status = 0; struct ocfs2_super *osb = OCFS2_SB(sb); if (ocfs2_is_hard_readonly(osb)) goto out; mutex_init(&osb->obs_trim_fs_mutex); status = ocfs2_dlm_init(osb); if (status < 0) { mlog_errno(status); if (status == -EBADR && ocfs2_userspace_stack(osb)) mlog(ML_ERROR, "couldn't mount because cluster name on" " disk does not match the running cluster name.\n"); goto out; } status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); goto out_dlm; } /* This will load up the node map and add ourselves to it. */ status = ocfs2_find_slot(osb); if (status < 0) { mlog_errno(status); goto out_super_lock; } /* load all node-local system inodes */ status = ocfs2_init_local_system_inodes(osb); if (status < 0) { mlog_errno(status); goto out_super_lock; } status = ocfs2_check_volume(osb); if (status < 0) { mlog_errno(status); goto out_system_inodes; } status = ocfs2_truncate_log_init(osb); if (status < 0) { mlog_errno(status); goto out_check_volume; } ocfs2_super_unlock(osb, 1); return 0; out_check_volume: ocfs2_free_replay_slots(osb); out_system_inodes: if (osb->local_alloc_state == OCFS2_LA_ENABLED) ocfs2_shutdown_local_alloc(osb); ocfs2_release_system_inodes(osb); /* before journal shutdown, we should release slot_info */ ocfs2_free_slot_info(osb); ocfs2_journal_shutdown(osb); out_super_lock: ocfs2_super_unlock(osb, 1); out_dlm: ocfs2_dlm_shutdown(osb, 0); out: return status; } static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) { int tmp, hangup_needed = 0; struct ocfs2_super *osb = NULL; char nodestr[12]; trace_ocfs2_dismount_volume(sb); BUG_ON(!sb); osb = OCFS2_SB(sb); BUG_ON(!osb); /* Remove file check sysfs related directores/files, * and wait for the pending file check operations */ ocfs2_filecheck_remove_sysfs(osb); kset_unregister(osb->osb_dev_kset); /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ WARN_ON(!llist_empty(&osb->dquot_drop_list)); /* Wait for worker to be done with the work structure in osb */ cancel_work_sync(&osb->dquot_drop_work); ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); ocfs2_sync_blockdev(sb); ocfs2_purge_refcount_trees(osb); /* No cluster connection means we've failed during mount, so skip * all the steps which depended on that to complete. */ if (osb->cconn) { tmp = ocfs2_super_lock(osb, 1); if (tmp < 0) { mlog_errno(tmp); return; } } if (osb->slot_num != OCFS2_INVALID_SLOT) ocfs2_put_slot(osb); if (osb->cconn) ocfs2_super_unlock(osb, 1); ocfs2_release_system_inodes(osb); ocfs2_journal_shutdown(osb); /* * If we're dismounting due to mount error, mount.ocfs2 will clean * up heartbeat. If we're a local mount, there is no heartbeat. * If we failed before we got a uuid_str yet, we can't stop * heartbeat. Otherwise, do it. */ if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str && !ocfs2_is_hard_readonly(osb)) hangup_needed = 1; ocfs2_dlm_shutdown(osb, hangup_needed); ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); debugfs_remove_recursive(osb->osb_debug_root); if (hangup_needed) ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str)); atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", osb->dev_str, nodestr); ocfs2_delete_osb(osb); kfree(osb); sb->s_dev = 0; sb->s_fs_info = NULL; } static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uuid, unsigned uuid_bytes) { int i, ret; char *ptr; BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN); osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL); if (osb->uuid_str == NULL) return -ENOMEM; for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) { /* print with null */ ret = snprintf(ptr, 3, "%02X", uuid[i]); if (ret != 2) /* drop super cleans up */ return -EINVAL; /* then only advance past the last char */ ptr += 2; } return 0; } /* Make sure entire volume is addressable by our journal. Requires osb_clusters_at_boot to be valid and for the journal to have been initialized by ocfs2_journal_init(). */ static int ocfs2_journal_addressable(struct ocfs2_super *osb) { int status = 0; u64 max_block = ocfs2_clusters_to_blocks(osb->sb, osb->osb_clusters_at_boot) - 1; /* 32-bit block number is always OK. */ if (max_block <= (u32)~0ULL) goto out; /* Volume is "huge", so see if our journal is new enough to support it. */ if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_JBD2_SB) && jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT))) { mlog(ML_ERROR, "The journal cannot address the entire volume. " "Enable the 'block64' journal option with tunefs.ocfs2"); status = -EFBIG; goto out; } out: return status; } static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, int sector_size, struct ocfs2_blockcheck_stats *stats) { int status; int i, cbits, bbits; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; struct inode *inode = NULL; struct ocfs2_super *osb; u64 total_blocks; osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL); if (!osb) { status = -ENOMEM; mlog_errno(status); goto out; } sb->s_fs_info = osb; sb->s_op = &ocfs2_sops; sb->s_d_op = &ocfs2_dentry_ops; sb->s_export_op = &ocfs2_export_ops; sb->s_qcop = &dquot_quotactl_sysfile_ops; sb->dq_op = &ocfs2_quota_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb->s_xattr = ocfs2_xattr_handlers; sb->s_time_gran = 1; sb->s_flags |= SB_NOATIME; /* this is needed to support O_LARGEFILE */ cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); memcpy(&sb->s_uuid, di->id2.i_super.s_uuid, sizeof(di->id2.i_super.s_uuid)); osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; for (i = 0; i < 3; i++) osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]); osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash); osb->sb = sb; osb->s_sectsize_bits = blksize_bits(sector_size); BUG_ON(!osb->s_sectsize_bits); spin_lock_init(&osb->dc_task_lock); init_waitqueue_head(&osb->dc_event); osb->dc_work_sequence = 0; osb->dc_wake_sequence = 0; INIT_LIST_HEAD(&osb->blocked_lock_list); osb->blocked_lock_count = 0; spin_lock_init(&osb->osb_lock); spin_lock_init(&osb->osb_xattr_lock); ocfs2_init_steal_slots(osb); mutex_init(&osb->system_file_mutex); atomic_set(&osb->alloc_stats.moves, 0); atomic_set(&osb->alloc_stats.local_data, 0); atomic_set(&osb->alloc_stats.bitmap_data, 0); atomic_set(&osb->alloc_stats.bg_allocs, 0); atomic_set(&osb->alloc_stats.bg_extends, 0); /* Copy the blockcheck stats from the superblock probe */ osb->osb_ecc_stats = *stats; ocfs2_init_node_maps(osb); snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { mlog(ML_ERROR, "Invalid number of node slots (%u)\n", osb->max_slots); status = -EINVAL; goto out; } ocfs2_orphan_scan_init(osb); status = ocfs2_recovery_init(osb); if (status) { mlog(ML_ERROR, "Unable to initialize recovery state\n"); mlog_errno(status); goto out; } init_waitqueue_head(&osb->checkpoint_event); osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; osb->slot_num = OCFS2_INVALID_SLOT; osb->s_xattr_inline_size = le16_to_cpu( di->id2.i_super.s_xattr_inline_size); osb->local_alloc_state = OCFS2_LA_UNUSED; osb->local_alloc_bh = NULL; INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker); init_waitqueue_head(&osb->osb_mount_event); ocfs2_resmap_init(osb, &osb->osb_la_resmap); osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); if (!osb->vol_label) { mlog(ML_ERROR, "unable to alloc vol label\n"); status = -ENOMEM; goto out_recovery_map; } osb->slot_recovery_generations = kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), GFP_KERNEL); if (!osb->slot_recovery_generations) { status = -ENOMEM; mlog_errno(status); goto out_vol_label; } init_waitqueue_head(&osb->osb_wipe_event); osb->osb_orphan_wipes = kcalloc(osb->max_slots, sizeof(*osb->osb_orphan_wipes), GFP_KERNEL); if (!osb->osb_orphan_wipes) { status = -ENOMEM; mlog_errno(status); goto out_slot_recovery_gen; } osb->osb_rf_lock_tree = RB_ROOT; osb->s_feature_compat = le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); osb->s_feature_ro_compat = le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat); osb->s_feature_incompat = le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat); if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) { mlog(ML_ERROR, "couldn't mount because of unsupported " "optional features (%x).\n", i); status = -EINVAL; goto out_orphan_wipes; } if (!sb_rdonly(osb->sb) && (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) { mlog(ML_ERROR, "couldn't mount RDWR because of " "unsupported optional features (%x).\n", i); status = -EINVAL; goto out_orphan_wipes; } if (ocfs2_clusterinfo_valid(osb)) { /* * ci_stack and ci_cluster in ocfs2_cluster_info may not be null * terminated, so make sure no overflow happens here by using * memcpy. Destination strings will always be null terminated * because osb is allocated using kzalloc. */ osb->osb_stackflags = OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; memcpy(osb->osb_cluster_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, OCFS2_STACK_LABEL_LEN); if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { mlog(ML_ERROR, "couldn't mount because of an invalid " "cluster stack label (%s) \n", osb->osb_cluster_stack); status = -EINVAL; goto out_orphan_wipes; } memcpy(osb->osb_cluster_name, OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, OCFS2_CLUSTER_NAME_LEN); } else { /* The empty string is identical with classic tools that * don't know about s_cluster_info. */ osb->osb_cluster_stack[0] = '\0'; } get_random_bytes(&osb->s_next_generation, sizeof(u32)); /* * FIXME * This should be done in ocfs2_journal_init(), but any inode * writes back operation will cause the filesystem to crash. */ status = ocfs2_journal_alloc(osb); if (status < 0) goto out_orphan_wipes; INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); init_llist_head(&osb->dquot_drop_list); /* get some pseudo constants for clustersize bits */ osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); osb->s_clustersize = 1 << osb->s_clustersize_bits; if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE || osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) { mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n", osb->s_clustersize); status = -EINVAL; goto out_journal; } total_blocks = ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters)); status = generic_check_addressable(osb->sb->s_blocksize_bits, total_blocks); if (status) { mlog(ML_ERROR, "Volume too large " "to mount safely on this system"); status = -EFBIG; goto out_journal; } if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid, sizeof(di->id2.i_super.s_uuid))) { mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n"); status = -ENOMEM; goto out_journal; } strscpy(osb->vol_label, di->id2.i_super.s_label, OCFS2_MAX_VOL_LABEL_LEN); osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno); osb->first_cluster_group_blkno = le64_to_cpu(di->id2.i_super.s_first_cluster_group); osb->fs_generation = le32_to_cpu(di->i_fs_generation); osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); trace_ocfs2_initialize_super(osb->vol_label, osb->uuid_str, (unsigned long long)osb->root_blkno, (unsigned long long)osb->system_dir_blkno, osb->s_clustersize_bits); osb->osb_dlm_debug = ocfs2_new_dlm_debug(); if (!osb->osb_dlm_debug) { status = -ENOMEM; mlog_errno(status); goto out_uuid_str; } atomic_set(&osb->vol_state, VOLUME_INIT); /* load root, system_dir, and all global system inodes */ status = ocfs2_init_global_system_inodes(osb); if (status < 0) { mlog_errno(status); goto out_dlm_out; } /* * global bitmap */ inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!inode) { status = -EINVAL; mlog_errno(status); goto out_system_inodes; } osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters; iput(inode); osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat) * 8; status = ocfs2_init_slot_info(osb); if (status < 0) { mlog_errno(status); goto out_system_inodes; } osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM); if (!osb->ocfs2_wq) { status = -ENOMEM; mlog_errno(status); goto out_slot_info; } return status; out_slot_info: ocfs2_free_slot_info(osb); out_system_inodes: ocfs2_release_system_inodes(osb); out_dlm_out: ocfs2_put_dlm_debug(osb->osb_dlm_debug); out_uuid_str: kfree(osb->uuid_str); out_journal: kfree(osb->journal); out_orphan_wipes: kfree(osb->osb_orphan_wipes); out_slot_recovery_gen: kfree(osb->slot_recovery_generations); out_vol_label: kfree(osb->vol_label); out_recovery_map: kfree(osb->recovery_map); out: kfree(osb); sb->s_fs_info = NULL; return status; } /* * will return: -EAGAIN if it is ok to keep searching for superblocks * -EINVAL if there is a bad superblock * 0 on success */ static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct buffer_head *bh, u32 blksz, struct ocfs2_blockcheck_stats *stats) { int status = -EAGAIN; if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { /* We have to do a raw check of the feature here */ if (le32_to_cpu(di->id2.i_super.s_feature_incompat) & OCFS2_FEATURE_INCOMPAT_META_ECC) { status = ocfs2_block_check_validate(bh->b_data, bh->b_size, &di->i_check, stats); if (status) goto out; } status = -EINVAL; if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { mlog(ML_ERROR, "found superblock with incorrect block " "size: found %u, should be %u\n", 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits), blksz); } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != OCFS2_MAJOR_REV_LEVEL || le16_to_cpu(di->id2.i_super.s_minor_rev_level) != OCFS2_MINOR_REV_LEVEL) { mlog(ML_ERROR, "found superblock with bad version: " "found %u.%u, should be %u.%u\n", le16_to_cpu(di->id2.i_super.s_major_rev_level), le16_to_cpu(di->id2.i_super.s_minor_rev_level), OCFS2_MAJOR_REV_LEVEL, OCFS2_MINOR_REV_LEVEL); } else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) { mlog(ML_ERROR, "bad block number on superblock: " "found %llu, should be %llu\n", (unsigned long long)le64_to_cpu(di->i_blkno), (unsigned long long)bh->b_blocknr); } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 || le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) { mlog(ML_ERROR, "bad cluster size found: %u\n", 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits)); } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) { mlog(ML_ERROR, "bad root_blkno: 0\n"); } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) { mlog(ML_ERROR, "bad system_dir_blkno: 0\n"); } else if (le16_to_cpu(di->id2.i_super.s_max_slots) > OCFS2_MAX_SLOTS) { mlog(ML_ERROR, "Superblock slots found greater than file system " "maximum: found %u, max %u\n", le16_to_cpu(di->id2.i_super.s_max_slots), OCFS2_MAX_SLOTS); } else { /* found it! */ status = 0; } } out: if (status && status != -EAGAIN) mlog_errno(status); return status; } static int ocfs2_check_volume(struct ocfs2_super *osb) { int status; int dirty; int local; struct ocfs2_dinode *local_alloc = NULL; /* only used if we * recover * ourselves. */ /* Init our journal object. */ status = ocfs2_journal_init(osb, &dirty); if (status < 0) { mlog(ML_ERROR, "Could not initialize journal!\n"); goto finally; } /* Now that journal has been initialized, check to make sure entire volume is addressable. */ status = ocfs2_journal_addressable(osb); if (status) goto finally; /* If the journal was unmounted cleanly then we don't want to * recover anything. Otherwise, journal_load will do that * dirty work for us :) */ if (!dirty) { status = ocfs2_journal_wipe(osb->journal, 0); if (status < 0) { mlog_errno(status); goto finally; } } else { printk(KERN_NOTICE "ocfs2: File system on device (%s) was not " "unmounted cleanly, recovering it.\n", osb->dev_str); } local = ocfs2_mount_local(osb); /* will play back anything left in the journal. */ status = ocfs2_journal_load(osb->journal, local, dirty); if (status < 0) { mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); goto finally; } if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) jbd2_journal_set_features(osb->journal->j_journal, JBD2_FEATURE_COMPAT_CHECKSUM, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); else jbd2_journal_clear_features(osb->journal->j_journal, JBD2_FEATURE_COMPAT_CHECKSUM, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); if (dirty) { /* recover my local alloc if we didn't unmount cleanly. */ status = ocfs2_begin_local_alloc_recovery(osb, osb->slot_num, &local_alloc); if (status < 0) { mlog_errno(status); goto finally; } /* we complete the recovery process after we've marked * ourselves as mounted. */ } status = ocfs2_load_local_alloc(osb); if (status < 0) { mlog_errno(status); goto finally; } if (dirty) { /* Recovery will be completed after we've mounted the * rest of the volume. */ osb->local_alloc_copy = local_alloc; local_alloc = NULL; } /* go through each journal, trylock it and if you get the * lock, and it's marked as dirty, set the bit in the recover * map and launch a recovery thread for it. */ status = ocfs2_mark_dead_nodes(osb); if (status < 0) { mlog_errno(status); goto finally; } status = ocfs2_compute_replay_slots(osb); if (status < 0) mlog_errno(status); finally: kfree(local_alloc); if (status) mlog_errno(status); return status; } /* * The routine gets called from dismount or close whenever a dismount on * volume is requested and the osb open count becomes 1. * It will remove the osb from the global list and also free up all the * initialized resources and fileobject. */ static void ocfs2_delete_osb(struct ocfs2_super *osb) { /* This function assumes that the caller has the main osb resource */ /* ocfs2_initializer_super have already created this workqueue */ if (osb->ocfs2_wq) destroy_workqueue(osb->ocfs2_wq); ocfs2_free_slot_info(osb); kfree(osb->osb_orphan_wipes); kfree(osb->slot_recovery_generations); /* FIXME * This belongs in journal shutdown, but because we have to * allocate osb->journal at the middle of ocfs2_initialize_super(), * we free it here. */ kfree(osb->journal); kfree(osb->local_alloc_copy); kfree(osb->uuid_str); kfree(osb->vol_label); ocfs2_put_dlm_debug(osb->osb_dlm_debug); memset(osb, 0, sizeof(struct ocfs2_super)); } /* Depending on the mount option passed, perform one of the following: * Put OCFS2 into a readonly state (default) * Return EIO so that only the process errs * Fix the error as if fsck.ocfs2 -y * panic */ static int ocfs2_handle_error(struct super_block *sb) { struct ocfs2_super *osb = OCFS2_SB(sb); int rv = 0; ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS); pr_crit("On-disk corruption discovered. " "Please run fsck.ocfs2 once the filesystem is unmounted.\n"); if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC) { panic("OCFS2: (device %s): panic forced after error\n", sb->s_id); } else if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_CONT) { pr_crit("OCFS2: Returning error to the calling process.\n"); rv = -EIO; } else { /* default option */ rv = -EROFS; if (sb_rdonly(sb) && (ocfs2_is_soft_readonly(osb) || ocfs2_is_hard_readonly(osb))) return rv; pr_crit("OCFS2: File system is now read-only.\n"); sb->s_flags |= SB_RDONLY; ocfs2_set_ro_flag(osb, 0); } return rv; } int __ocfs2_error(struct super_block *sb, const char *function, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; /* Not using mlog here because we want to show the actual * function the error came from. */ printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV", sb->s_id, function, &vaf); va_end(args); return ocfs2_handle_error(sb); } /* Handle critical errors. This is intentionally more drastic than * ocfs2_handle_error, so we only use for things like journal errors, * etc. */ void __ocfs2_abort(struct super_block *sb, const char *function, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV", sb->s_id, function, &vaf); va_end(args); /* We don't have the cluster support yet to go straight to * hard readonly in here. Until then, we want to keep * ocfs2_abort() so that we can at least mark critical * errors. * * TODO: This should abort the journal and alert other nodes * that our slot needs recovery. */ /* Force a panic(). This stinks, but it's better than letting * things continue without having a proper hard readonly * here. */ if (!ocfs2_mount_local(OCFS2_SB(sb))) OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; ocfs2_handle_error(sb); } /* * Void signal blockers, because in-kernel sigprocmask() only fails * when SIG_* is wrong. */ void ocfs2_block_signals(sigset_t *oldset) { int rc; sigset_t blocked; sigfillset(&blocked); rc = sigprocmask(SIG_BLOCK, &blocked, oldset); BUG_ON(rc); } void ocfs2_unblock_signals(sigset_t *oldset) { int rc = sigprocmask(SIG_SETMASK, oldset, NULL); BUG_ON(rc); } module_init(ocfs2_init); module_exit(ocfs2_exit); |
10 10 1 1 4 10 10 7 11 4 2 2 1 2 7 7 11 17 17 10 6 11 6 1 2 2 1 5 2 1 22 22 11 4 14 3 15 9 9 11 4 5 13 1 8 4 8 4 8 4 8 4 8 4 8 4 8 8 6 6 136 227 1 1 3 1 2 227 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux NET3: IP/IP protocol decoder modified to support * virtual tunnel interface * * Authors: * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012 */ /* This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c For comments look at net/ipv4/ip_gre.c --ANK */ #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> #include <linux/icmpv6.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> #include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> static struct rtnl_link_ops vti_link_ops __read_mostly; static unsigned int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type, bool update_skb_dev) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, vti_net_id); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; if (update_skb_dev) skb->dev = tunnel->dev; return xfrm_input(skb, nexthdr, spi, encap_type); } return -EINVAL; drop: kfree_skb(skb); return 0; } static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { return vti_input(skb, nexthdr, spi, encap_type, false); } static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev); } static int vti_rcv_proto(struct sk_buff *skb) { return vti_rcv(skb, 0, false); } static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; struct net_device *dev; struct xfrm_state *x; const struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; if (!tunnel) return 1; dev = tunnel->dev; if (err) { DEV_STATS_INC(dev, rx_errors); DEV_STATS_INC(dev, rx_dropped); return 0; } x = xfrm_input_state(skb); inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); if (inner_mode == NULL) { XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINSTATEMODEERROR); return -EINVAL; } } family = inner_mode->family; skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); skb->mark = orig_mark; if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); return 0; } static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src) { xfrm_address_t *daddr = (xfrm_address_t *)&dst; xfrm_address_t *saddr = (xfrm_address_t *)&src; /* if there is no transform then this tunnel is not functional. * Or if the xfrm is not mode tunnel. */ if (!x || x->props.mode != XFRM_MODE_TUNNEL || x->props.family != AF_INET) return false; if (!dst) return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET); if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET)) return false; return true; } static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_parm *parms = &tunnel->parms; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ int pkt_len = skb->len; int err; int mtu; if (!dst) { switch (skb->protocol) { case htons(ETH_P_IP): { struct rtable *rt; fl->u.ip4.flowi4_oif = dev->ifindex; fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); if (IS_ERR(rt)) { DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } dst = &rt->dst; skb_dst_set(skb, dst); break; } #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): fl->u.ip6.flowi6_oif = dev->ifindex; fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); if (dst->error) { dst_release(dst); dst = NULL; DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } skb_dst_set(skb, dst); break; #endif default: DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } } dst_hold(dst); dst = xfrm_lookup_route(tunnel->net, dst, fl, NULL, 0); if (IS_ERR(dst)) { DEV_STATS_INC(dev, tx_carrier_errors); goto tx_error_icmp; } if (dst->flags & DST_XFRM_QUEUE) goto xmit; if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) { DEV_STATS_INC(dev, tx_carrier_errors); dst_release(dst); goto tx_error_icmp; } tdev = dst->dev; if (tdev == dev) { dst_release(dst); DEV_STATS_INC(dev, collisions); goto tx_error; } mtu = dst_mtu(dst); if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } else { if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } dst_release(dst); goto tx_error; } xmit: skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) err = pkt_len; iptunnel_xmit_stats(dev, err); return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); tx_error: DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return NETDEV_TX_OK; } /* This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. */ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct flowi fl; if (!pskb_inet_may_pull(skb)) goto tx_err; memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; default: goto tx_err; } /* override mark with tunnel output key */ fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key); return vti_xmit(skb, dev, &fl); tx_err: DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return NETDEV_TX_OK; } static int vti4_err(struct sk_buff *skb, u32 info) { __be32 spi; __u32 mark; struct xfrm_state *x; struct ip_tunnel *tunnel; struct ip_esp_hdr *esph; struct ip_auth_hdr *ah ; struct ip_comp_hdr *ipch; struct net *net = dev_net(skb->dev); const struct iphdr *iph = (const struct iphdr *)skb->data; int protocol = iph->protocol; struct ip_tunnel_net *itn = net_generic(net, vti_net_id); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->daddr, iph->saddr, 0); if (!tunnel) return -1; mark = be32_to_cpu(tunnel->parms.o_key); switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); spi = esph->spi; break; case IPPROTO_AH: ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); spi = ah->spi; break; case IPPROTO_COMP: ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); spi = htonl(ntohs(ipch->cpi)); break; default: return 0; } switch (icmp_hdr(skb)->type) { case ICMP_DEST_UNREACH: if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return 0; break; case ICMP_REDIRECT: break; default: return 0; } x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET); if (!x) return 0; if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, protocol); else ipv4_redirect(skb, net, 0, protocol); xfrm_state_put(x); return 0; } static int vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p->iph.version != 4 || p->iph.protocol != IPPROTO_IPIP || p->iph.ihl != 5) return -EINVAL; } if (!(p->i_flags & GRE_KEY)) p->i_key = 0; if (!(p->o_flags & GRE_KEY)) p->o_key = 0; p->i_flags = VTI_ISVTI; err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { p->i_flags |= GRE_KEY; p->o_flags |= GRE_KEY; } return 0; } static const struct net_device_ops vti_netdev_ops = { .ndo_init = vti_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = vti_tunnel_xmit, .ndo_siocdevprivate = ip_tunnel_siocdevprivate, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = vti_tunnel_ctl, }; static void vti_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &vti_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_TUNNEL; ip_tunnel_setup(dev, vti_net_id); } static int vti_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; __dev_addr_set(dev, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); return ip_tunnel_init(dev); } static void __net_init vti_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; iph->version = 4; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; } static struct xfrm4_protocol vti_esp4_protocol __read_mostly = { .handler = vti_rcv_proto, .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm4_protocol vti_ah4_protocol __read_mostly = { .handler = vti_rcv_proto, .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .handler = vti_rcv_proto, .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) static int vti_rcv_tunnel(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); return vti_input(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr, 0, false); } static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .handler = vti_rcv_tunnel, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 0, }; #if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { .handler = vti_rcv_tunnel, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 0, }; #endif #endif static int __net_init vti_init_net(struct net *net) { int err; struct ip_tunnel_net *itn; err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0"); if (err) return err; itn = net_generic(net, vti_net_id); if (itn->fb_tunnel_dev) vti_fb_tunnel_init(itn->fb_tunnel_dev); return 0; } static void __net_exit vti_exit_batch_net(struct list_head *list_net) { ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, .exit_batch = vti_exit_batch_net, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { return 0; } static void vti_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms, __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); parms->iph.protocol = IPPROTO_IPIP; if (!data) return; parms->i_flags = VTI_ISVTI; if (data[IFLA_VTI_LINK]) parms->link = nla_get_u32(data[IFLA_VTI_LINK]); if (data[IFLA_VTI_IKEY]) parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); if (data[IFLA_VTI_OKEY]) parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); if (data[IFLA_VTI_LOCAL]) parms->iph.saddr = nla_get_in_addr(data[IFLA_VTI_LOCAL]); if (data[IFLA_VTI_REMOTE]) parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]); if (data[IFLA_VTI_FWMARK]) *fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); } static int vti_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel_parm parms; __u32 fwmark = 0; vti_netlink_parms(data, &parms, &fwmark); return ip_tunnel_newlink(dev, tb, &parms, fwmark); } static int vti_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); __u32 fwmark = t->fwmark; struct ip_tunnel_parm p; vti_netlink_parms(data, &p, &fwmark); return ip_tunnel_changelink(dev, tb, &p, fwmark); } static size_t vti_get_size(const struct net_device *dev) { return /* IFLA_VTI_LINK */ nla_total_size(4) + /* IFLA_VTI_IKEY */ nla_total_size(4) + /* IFLA_VTI_OKEY */ nla_total_size(4) + /* IFLA_VTI_LOCAL */ nla_total_size(4) + /* IFLA_VTI_REMOTE */ nla_total_size(4) + /* IFLA_VTI_FWMARK */ nla_total_size(4) + 0; } static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) || nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) || nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr) || nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr) || nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark)) return -EMSGSIZE; return 0; } static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = { [IFLA_VTI_LINK] = { .type = NLA_U32 }, [IFLA_VTI_IKEY] = { .type = NLA_U32 }, [IFLA_VTI_OKEY] = { .type = NLA_U32 }, [IFLA_VTI_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) }, [IFLA_VTI_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_VTI_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vti_link_ops __read_mostly = { .kind = "vti", .maxtype = IFLA_VTI_MAX, .policy = vti_policy, .priv_size = sizeof(struct ip_tunnel), .setup = vti_tunnel_setup, .validate = vti_tunnel_validate, .newlink = vti_newlink, .changelink = vti_changelink, .dellink = ip_tunnel_dellink, .get_size = vti_get_size, .fill_info = vti_fill_info, .get_link_net = ip_tunnel_get_link_net, }; static int __init vti_init(void) { const char *msg; int err; pr_info("IPv4 over IPsec tunneling driver\n"); msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) goto pernet_dev_failed; msg = "tunnel protocols"; err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP); if (err < 0) goto xfrm_proto_esp_failed; err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH); if (err < 0) goto xfrm_proto_ah_failed; err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ipip_failed; #if IS_ENABLED(CONFIG_IPV6) err = xfrm4_tunnel_register(&vti_ipip6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipip6_failed; #endif #endif msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) goto rtnl_link_failed; return err; rtnl_link_failed: #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) #if IS_ENABLED(CONFIG_IPV6) xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6); xfrm_tunnel_ipip6_failed: #endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); xfrm_tunnel_ipip_failed: #endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: pr_err("vti init: failed to register %s\n", msg); return err; } static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) #if IS_ENABLED(CONFIG_IPV6) xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6); #endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); #endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); } module_init(vti_init); module_exit(vti_fini); MODULE_DESCRIPTION("Virtual (secure) IP tunneling library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("vti"); MODULE_ALIAS_NETDEV("ip_vti0"); |
15 15 15 15 15 15 21 21 21 21 21 21 21 21 21 21 21 21 442 442 21 15 15 15 15 15 21 21 21 21 21 21 21 21 21 15 21 442 442 416 416 21 21 21 21 21 22 22 23 22 7 21 21 21 15 15 15 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 | // SPDX-License-Identifier: GPL-2.0-only /* * net/sunrpc/rpc_pipe.c * * Userland/kernel interface for rpcauth_gss. * Code shamelessly plagiarized from fs/nfsd/nfsctl.c * and fs/sysfs/inode.c * * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no> * */ #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/namei.h> #include <linux/fsnotify.h> #include <linux/kernel.h> #include <linux/rcupdate.h> #include <linux/utsname.h> #include <asm/ioctls.h> #include <linux/poll.h> #include <linux/wait.h> #include <linux/seq_file.h> #include <linux/sunrpc/clnt.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/cache.h> #include <linux/nsproxy.h> #include <linux/notifier.h> #include "netns.h" #include "sunrpc.h" #define RPCDBG_FACILITY RPCDBG_DEBUG #define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "") static struct file_system_type rpc_pipe_fs_type; static const struct rpc_pipe_ops gssd_dummy_pipe_ops; static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list); int rpc_pipefs_notifier_register(struct notifier_block *nb) { return blocking_notifier_chain_register(&rpc_pipefs_notifier_list, nb); } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register); void rpc_pipefs_notifier_unregister(struct notifier_block *nb) { blocking_notifier_chain_unregister(&rpc_pipefs_notifier_list, nb); } EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister); static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head, void (*destroy_msg)(struct rpc_pipe_msg *), int err) { struct rpc_pipe_msg *msg; if (list_empty(head)) return; do { msg = list_entry(head->next, struct rpc_pipe_msg, list); list_del_init(&msg->list); msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); if (waitq) wake_up(waitq); } static void rpc_timeout_upcall_queue(struct work_struct *work) { LIST_HEAD(free_list); struct rpc_pipe *pipe = container_of(work, struct rpc_pipe, queue_timeout.work); void (*destroy_msg)(struct rpc_pipe_msg *); struct dentry *dentry; spin_lock(&pipe->lock); destroy_msg = pipe->ops->destroy_msg; if (pipe->nreaders == 0) { list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; } dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); rpc_purge_list(dentry ? &RPC_I(d_inode(dentry))->waitq : NULL, &free_list, destroy_msg, -ETIMEDOUT); dput(dentry); } ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, char __user *dst, size_t buflen) { char *data = (char *)msg->data + msg->copied; size_t mlen = min(msg->len - msg->copied, buflen); unsigned long left; left = copy_to_user(dst, data, mlen); if (left == mlen) { msg->errno = -EFAULT; return -EFAULT; } mlen -= left; msg->copied += mlen; msg->errno = 0; return mlen; } EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); /** * rpc_queue_upcall - queue an upcall message to userspace * @pipe: upcall pipe on which to queue given message * @msg: message to queue * * Call with an @inode created by rpc_mkpipe() to queue an upcall. * A userspace process may then later read the upcall by performing a * read on an open file for this inode. It is up to the caller to * initialize the fields of @msg (other than @msg->list) appropriately. */ int rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg) { int res = -EPIPE; struct dentry *dentry; spin_lock(&pipe->lock); if (pipe->nreaders) { list_add_tail(&msg->list, &pipe->pipe); pipe->pipelen += msg->len; res = 0; } else if (pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) { if (list_empty(&pipe->pipe)) queue_delayed_work(rpciod_workqueue, &pipe->queue_timeout, RPC_UPCALL_TIMEOUT); list_add_tail(&msg->list, &pipe->pipe); pipe->pipelen += msg->len; res = 0; } dentry = dget(pipe->dentry); spin_unlock(&pipe->lock); if (dentry) { wake_up(&RPC_I(d_inode(dentry))->waitq); dput(dentry); } return res; } EXPORT_SYMBOL_GPL(rpc_queue_upcall); static inline void rpc_inode_setowner(struct inode *inode, void *private) { RPC_I(inode)->private = private; } static void rpc_close_pipes(struct inode *inode) { struct rpc_pipe *pipe = RPC_I(inode)->pipe; int need_release; LIST_HEAD(free_list); inode_lock(inode); spin_lock(&pipe->lock); need_release = pipe->nreaders != 0 || pipe->nwriters != 0; pipe->nreaders = 0; list_splice_init(&pipe->in_upcall, &free_list); list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; pipe->dentry = NULL; spin_unlock(&pipe->lock); rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EPIPE); pipe->nwriters = 0; if (need_release && pipe->ops->release_pipe) pipe->ops->release_pipe(inode); cancel_delayed_work_sync(&pipe->queue_timeout); rpc_inode_setowner(inode, NULL); RPC_I(inode)->pipe = NULL; inode_unlock(inode); } static struct inode * rpc_alloc_inode(struct super_block *sb) { struct rpc_inode *rpci; rpci = alloc_inode_sb(sb, rpc_inode_cachep, GFP_KERNEL); if (!rpci) return NULL; return &rpci->vfs_inode; } static void rpc_free_inode(struct inode *inode) { kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } static int rpc_pipe_open(struct inode *inode, struct file *filp) { struct rpc_pipe *pipe; int first_open; int res = -ENXIO; inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) goto out; first_open = pipe->nreaders == 0 && pipe->nwriters == 0; if (first_open && pipe->ops->open_pipe) { res = pipe->ops->open_pipe(inode); if (res) goto out; } if (filp->f_mode & FMODE_READ) pipe->nreaders++; if (filp->f_mode & FMODE_WRITE) pipe->nwriters++; res = 0; out: inode_unlock(inode); return res; } static int rpc_pipe_release(struct inode *inode, struct file *filp) { struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int last_close; inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) goto out; msg = filp->private_data; if (msg != NULL) { spin_lock(&pipe->lock); msg->errno = -EAGAIN; list_del_init(&msg->list); spin_unlock(&pipe->lock); pipe->ops->destroy_msg(msg); } if (filp->f_mode & FMODE_WRITE) pipe->nwriters --; if (filp->f_mode & FMODE_READ) { pipe->nreaders --; if (pipe->nreaders == 0) { LIST_HEAD(free_list); spin_lock(&pipe->lock); list_splice_init(&pipe->pipe, &free_list); pipe->pipelen = 0; spin_unlock(&pipe->lock); rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EAGAIN); } } last_close = pipe->nwriters == 0 && pipe->nreaders == 0; if (last_close && pipe->ops->release_pipe) pipe->ops->release_pipe(inode); out: inode_unlock(inode); return 0; } static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int res = 0; inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) { res = -EPIPE; goto out_unlock; } msg = filp->private_data; if (msg == NULL) { spin_lock(&pipe->lock); if (!list_empty(&pipe->pipe)) { msg = list_entry(pipe->pipe.next, struct rpc_pipe_msg, list); list_move(&msg->list, &pipe->in_upcall); pipe->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; } spin_unlock(&pipe->lock); if (msg == NULL) goto out_unlock; } /* NOTE: it is up to the callback to update msg->copied */ res = pipe->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; spin_lock(&pipe->lock); list_del_init(&msg->list); spin_unlock(&pipe->lock); pipe->ops->destroy_msg(msg); } out_unlock: inode_unlock(inode); return res; } static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { struct inode *inode = file_inode(filp); int res; inode_lock(inode); res = -EPIPE; if (RPC_I(inode)->pipe != NULL) res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len); inode_unlock(inode); return res; } static __poll_t rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { struct inode *inode = file_inode(filp); struct rpc_inode *rpci = RPC_I(inode); __poll_t mask = EPOLLOUT | EPOLLWRNORM; poll_wait(filp, &rpci->waitq, wait); inode_lock(inode); if (rpci->pipe == NULL) mask |= EPOLLERR | EPOLLHUP; else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) mask |= EPOLLIN | EPOLLRDNORM; inode_unlock(inode); return mask; } static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; int len; switch (cmd) { case FIONREAD: inode_lock(inode); pipe = RPC_I(inode)->pipe; if (pipe == NULL) { inode_unlock(inode); return -EPIPE; } spin_lock(&pipe->lock); len = pipe->pipelen; if (filp->private_data) { struct rpc_pipe_msg *msg; msg = filp->private_data; len += msg->len - msg->copied; } spin_unlock(&pipe->lock); inode_unlock(inode); return put_user(len, (int __user *)arg); default: return -EINVAL; } } static const struct file_operations rpc_pipe_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = rpc_pipe_read, .write = rpc_pipe_write, .poll = rpc_pipe_poll, .unlocked_ioctl = rpc_pipe_ioctl, .open = rpc_pipe_open, .release = rpc_pipe_release, }; static int rpc_show_info(struct seq_file *m, void *v) { struct rpc_clnt *clnt = m->private; rcu_read_lock(); seq_printf(m, "RPC server: %s\n", rcu_dereference(clnt->cl_xprt)->servername); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); rcu_read_unlock(); return 0; } static int rpc_info_open(struct inode *inode, struct file *file) { struct rpc_clnt *clnt = NULL; int ret = single_open(file, rpc_show_info, NULL); if (!ret) { struct seq_file *m = file->private_data; spin_lock(&file->f_path.dentry->d_lock); if (!d_unhashed(file->f_path.dentry)) clnt = RPC_I(inode)->private; if (clnt != NULL && refcount_inc_not_zero(&clnt->cl_count)) { spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { spin_unlock(&file->f_path.dentry->d_lock); single_release(inode, file); ret = -EINVAL; } } return ret; } static int rpc_info_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; struct rpc_clnt *clnt = (struct rpc_clnt *)m->private; if (clnt) rpc_release_client(clnt); return single_release(inode, file); } static const struct file_operations rpc_info_operations = { .owner = THIS_MODULE, .open = rpc_info_open, .read = seq_read, .llseek = seq_lseek, .release = rpc_info_release, }; /* * Description of fs contents. */ struct rpc_filelist { const char *name; const struct file_operations *i_fop; umode_t mode; }; static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (!inode) return NULL; inode->i_ino = get_next_ino(); inode->i_mode = mode; simple_inode_init_ts(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); break; default: break; } return inode; } static int __rpc_create_common(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private) { struct inode *inode; d_drop(dentry); inode = rpc_get_inode(dir->i_sb, mode); if (!inode) goto out_err; inode->i_ino = iunique(dir->i_sb, 100); if (i_fop) inode->i_fop = i_fop; if (private) rpc_inode_setowner(inode, private); d_add(dentry, inode); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n", __FILE__, __func__, dentry); dput(dentry); return -ENOMEM; } static int __rpc_create(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private) { int err; err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private); if (err) return err; fsnotify_create(dir, dentry); return 0; } static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private) { int err; err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private); if (err) return err; inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; } static void init_pipe(struct rpc_pipe *pipe) { pipe->nreaders = 0; pipe->nwriters = 0; INIT_LIST_HEAD(&pipe->in_upcall); INIT_LIST_HEAD(&pipe->in_downcall); INIT_LIST_HEAD(&pipe->pipe); pipe->pipelen = 0; INIT_DELAYED_WORK(&pipe->queue_timeout, rpc_timeout_upcall_queue); pipe->ops = NULL; spin_lock_init(&pipe->lock); pipe->dentry = NULL; } void rpc_destroy_pipe_data(struct rpc_pipe *pipe) { kfree(pipe); } EXPORT_SYMBOL_GPL(rpc_destroy_pipe_data); struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags) { struct rpc_pipe *pipe; pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL); if (!pipe) return ERR_PTR(-ENOMEM); init_pipe(pipe); pipe->ops = ops; pipe->flags = flags; return pipe; } EXPORT_SYMBOL_GPL(rpc_mkpipe_data); static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *i_fop, void *private, struct rpc_pipe *pipe) { struct rpc_inode *rpci; int err; err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); if (err) return err; rpci = RPC_I(d_inode(dentry)); rpci->private = private; rpci->pipe = pipe; fsnotify_create(dir, dentry); return 0; } static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) { int ret; dget(dentry); ret = simple_rmdir(dir, dentry); d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); dput(dentry); return ret; } static int __rpc_unlink(struct inode *dir, struct dentry *dentry) { int ret; dget(dentry); ret = simple_unlink(dir, dentry); d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); dput(dentry); return ret; } static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); rpc_close_pipes(inode); return __rpc_unlink(dir, dentry); } static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, const char *name) { struct qstr q = QSTR_INIT(name, strlen(name)); struct dentry *dentry = d_hash_and_lookup(parent, &q); if (!dentry) { dentry = d_alloc(parent, &q); if (!dentry) return ERR_PTR(-ENOMEM); } if (d_really_is_negative(dentry)) return dentry; dput(dentry); return ERR_PTR(-EEXIST); } /* * FIXME: This probably has races. */ static void __rpc_depopulate(struct dentry *parent, const struct rpc_filelist *files, int start, int eof) { struct inode *dir = d_inode(parent); struct dentry *dentry; struct qstr name; int i; for (i = start; i < eof; i++) { name.name = files[i].name; name.len = strlen(files[i].name); dentry = d_hash_and_lookup(parent, &name); if (dentry == NULL) continue; if (d_really_is_negative(dentry)) goto next; switch (d_inode(dentry)->i_mode & S_IFMT) { default: BUG(); case S_IFREG: __rpc_unlink(dir, dentry); break; case S_IFDIR: __rpc_rmdir(dir, dentry); } next: dput(dentry); } } static void rpc_depopulate(struct dentry *parent, const struct rpc_filelist *files, int start, int eof) { struct inode *dir = d_inode(parent); inode_lock_nested(dir, I_MUTEX_CHILD); __rpc_depopulate(parent, files, start, eof); inode_unlock(dir); } static int rpc_populate(struct dentry *parent, const struct rpc_filelist *files, int start, int eof, void *private) { struct inode *dir = d_inode(parent); struct dentry *dentry; int i, err; inode_lock(dir); for (i = start; i < eof; i++) { dentry = __rpc_lookup_create_exclusive(parent, files[i].name); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_bad; switch (files[i].mode & S_IFMT) { default: BUG(); case S_IFREG: err = __rpc_create(dir, dentry, files[i].mode, files[i].i_fop, private); break; case S_IFDIR: err = __rpc_mkdir(dir, dentry, files[i].mode, NULL, private); } if (err != 0) goto out_bad; } inode_unlock(dir); return 0; out_bad: __rpc_depopulate(parent, files, start, eof); inode_unlock(dir); printk(KERN_WARNING "%s: %s failed to populate directory %pd\n", __FILE__, __func__, parent); return err; } static struct dentry *rpc_mkdir_populate(struct dentry *parent, const char *name, umode_t mode, void *private, int (*populate)(struct dentry *, void *), void *args_populate) { struct dentry *dentry; struct inode *dir = d_inode(parent); int error; inode_lock_nested(dir, I_MUTEX_PARENT); dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) goto out; error = __rpc_mkdir(dir, dentry, mode, NULL, private); if (error != 0) goto out_err; if (populate != NULL) { error = populate(dentry, args_populate); if (error) goto err_rmdir; } out: inode_unlock(dir); return dentry; err_rmdir: __rpc_rmdir(dir, dentry); out_err: dentry = ERR_PTR(error); goto out; } static int rpc_rmdir_depopulate(struct dentry *dentry, void (*depopulate)(struct dentry *)) { struct dentry *parent; struct inode *dir; int error; parent = dget_parent(dentry); dir = d_inode(parent); inode_lock_nested(dir, I_MUTEX_PARENT); if (depopulate != NULL) depopulate(dentry); error = __rpc_rmdir(dir, dentry); inode_unlock(dir); dput(parent); return error; } /** * rpc_mkpipe_dentry - make an rpc_pipefs file for kernel<->userspace * communication * @parent: dentry of directory to create new "pipe" in * @name: name of pipe * @private: private data to associate with the pipe, for the caller's use * @pipe: &rpc_pipe containing input parameters * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to * @ops->upcall, which will be called with the file pointer, * message, and userspace buffer to copy to. * * Writes can come at any time, and do not necessarily have to be * responses to upcalls. They will result in calls to @msg->downcall. * * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file_inode(file))->private. */ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, void *private, struct rpc_pipe *pipe) { struct dentry *dentry; struct inode *dir = d_inode(parent); umode_t umode = S_IFIFO | 0600; int err; if (pipe->ops->upcall == NULL) umode &= ~0444; if (pipe->ops->downcall == NULL) umode &= ~0222; inode_lock_nested(dir, I_MUTEX_PARENT); dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) goto out; err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, private, pipe); if (err) goto out_err; out: inode_unlock(dir); return dentry; out_err: dentry = ERR_PTR(err); printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n", __FILE__, __func__, parent, name, err); goto out; } EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry); /** * rpc_unlink - remove a pipe * @dentry: dentry for the pipe, as returned from rpc_mkpipe * * After this call, lookups will no longer find the pipe, and any * attempts to read or write using preexisting opens of the pipe will * return -EPIPE. */ int rpc_unlink(struct dentry *dentry) { struct dentry *parent; struct inode *dir; int error = 0; parent = dget_parent(dentry); dir = d_inode(parent); inode_lock_nested(dir, I_MUTEX_PARENT); error = __rpc_rmpipe(dir, dentry); inode_unlock(dir); dput(parent); return error; } EXPORT_SYMBOL_GPL(rpc_unlink); /** * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head * @pdh: pointer to struct rpc_pipe_dir_head */ void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh) { INIT_LIST_HEAD(&pdh->pdh_entries); pdh->pdh_dentry = NULL; } EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head); /** * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object * @pdo: pointer to struct rpc_pipe_dir_object * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops * @pdo_data: pointer to caller-defined data */ void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo, const struct rpc_pipe_dir_object_ops *pdo_ops, void *pdo_data) { INIT_LIST_HEAD(&pdo->pdo_head); pdo->pdo_ops = pdo_ops; pdo->pdo_data = pdo_data; } EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object); static int rpc_add_pipe_dir_object_locked(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { int ret = 0; if (pdh->pdh_dentry) ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo); if (ret == 0) list_add_tail(&pdo->pdo_head, &pdh->pdh_entries); return ret; } static void rpc_remove_pipe_dir_object_locked(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { if (pdh->pdh_dentry) pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo); list_del_init(&pdo->pdo_head); } /** * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory * @net: pointer to struct net * @pdh: pointer to struct rpc_pipe_dir_head * @pdo: pointer to struct rpc_pipe_dir_object * */ int rpc_add_pipe_dir_object(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { int ret = 0; if (list_empty(&pdo->pdo_head)) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo); mutex_unlock(&sn->pipefs_sb_lock); } return ret; } EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object); /** * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory * @net: pointer to struct net * @pdh: pointer to struct rpc_pipe_dir_head * @pdo: pointer to struct rpc_pipe_dir_object * */ void rpc_remove_pipe_dir_object(struct net *net, struct rpc_pipe_dir_head *pdh, struct rpc_pipe_dir_object *pdo) { if (!list_empty(&pdo->pdo_head)) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); rpc_remove_pipe_dir_object_locked(net, pdh, pdo); mutex_unlock(&sn->pipefs_sb_lock); } } EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object); /** * rpc_find_or_alloc_pipe_dir_object * @net: pointer to struct net * @pdh: pointer to struct rpc_pipe_dir_head * @match: match struct rpc_pipe_dir_object to data * @alloc: allocate a new struct rpc_pipe_dir_object * @data: user defined data for match() and alloc() * */ struct rpc_pipe_dir_object * rpc_find_or_alloc_pipe_dir_object(struct net *net, struct rpc_pipe_dir_head *pdh, int (*match)(struct rpc_pipe_dir_object *, void *), struct rpc_pipe_dir_object *(*alloc)(void *), void *data) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe_dir_object *pdo; mutex_lock(&sn->pipefs_sb_lock); list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) { if (!match(pdo, data)) continue; goto out; } pdo = alloc(data); if (!pdo) goto out; rpc_add_pipe_dir_object_locked(net, pdh, pdo); out: mutex_unlock(&sn->pipefs_sb_lock); return pdo; } EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object); static void rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) { struct rpc_pipe_dir_object *pdo; struct dentry *dir = pdh->pdh_dentry; list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) pdo->pdo_ops->create(dir, pdo); } static void rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) { struct rpc_pipe_dir_object *pdo; struct dentry *dir = pdh->pdh_dentry; list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) pdo->pdo_ops->destroy(dir, pdo); } enum { RPCAUTH_info, RPCAUTH_EOF }; static const struct rpc_filelist authfiles[] = { [RPCAUTH_info] = { .name = "info", .i_fop = &rpc_info_operations, .mode = S_IFREG | 0400, }, }; static int rpc_clntdir_populate(struct dentry *dentry, void *private) { return rpc_populate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF, private); } static void rpc_clntdir_depopulate(struct dentry *dentry) { rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); } /** * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs * @dentry: the parent of new directory * @name: the name of new directory * @rpc_client: rpc client to associate with this directory * * This creates a directory at the given @path associated with * @rpc_clnt, which will contain a file named "info" with some basic * information about the client, together with any "pipes" that may * later be created using rpc_mkpipe(). */ struct dentry *rpc_create_client_dir(struct dentry *dentry, const char *name, struct rpc_clnt *rpc_client) { struct dentry *ret; ret = rpc_mkdir_populate(dentry, name, 0555, NULL, rpc_clntdir_populate, rpc_client); if (!IS_ERR(ret)) { rpc_client->cl_pipedir_objects.pdh_dentry = ret; rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects); } return ret; } /** * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() * @rpc_client: rpc_client for the pipe */ int rpc_remove_client_dir(struct rpc_clnt *rpc_client) { struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry; if (dentry == NULL) return 0; rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects); rpc_client->cl_pipedir_objects.pdh_dentry = NULL; return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); } static const struct rpc_filelist cache_pipefs_files[3] = { [0] = { .name = "channel", .i_fop = &cache_file_operations_pipefs, .mode = S_IFREG | 0600, }, [1] = { .name = "content", .i_fop = &content_file_operations_pipefs, .mode = S_IFREG | 0400, }, [2] = { .name = "flush", .i_fop = &cache_flush_operations_pipefs, .mode = S_IFREG | 0600, }, }; static int rpc_cachedir_populate(struct dentry *dentry, void *private) { return rpc_populate(dentry, cache_pipefs_files, 0, 3, private); } static void rpc_cachedir_depopulate(struct dentry *dentry) { rpc_depopulate(dentry, cache_pipefs_files, 0, 3); } struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { return rpc_mkdir_populate(parent, name, umode, NULL, rpc_cachedir_populate, cd); } void rpc_remove_cache_dir(struct dentry *dentry) { rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); } /* * populate the filesystem */ static const struct super_operations s_ops = { .alloc_inode = rpc_alloc_inode, .free_inode = rpc_free_inode, .statfs = simple_statfs, }; #define RPCAUTH_GSSMAGIC 0x67596969 /* * We have a single directory with 1 node in it. */ enum { RPCAUTH_lockd, RPCAUTH_mount, RPCAUTH_nfs, RPCAUTH_portmap, RPCAUTH_statd, RPCAUTH_nfsd4_cb, RPCAUTH_cache, RPCAUTH_nfsd, RPCAUTH_gssd, RPCAUTH_RootEOF }; static const struct rpc_filelist files[] = { [RPCAUTH_lockd] = { .name = "lockd", .mode = S_IFDIR | 0555, }, [RPCAUTH_mount] = { .name = "mount", .mode = S_IFDIR | 0555, }, [RPCAUTH_nfs] = { .name = "nfs", .mode = S_IFDIR | 0555, }, [RPCAUTH_portmap] = { .name = "portmap", .mode = S_IFDIR | 0555, }, [RPCAUTH_statd] = { .name = "statd", .mode = S_IFDIR | 0555, }, [RPCAUTH_nfsd4_cb] = { .name = "nfsd4_cb", .mode = S_IFDIR | 0555, }, [RPCAUTH_cache] = { .name = "cache", .mode = S_IFDIR | 0555, }, [RPCAUTH_nfsd] = { .name = "nfsd", .mode = S_IFDIR | 0555, }, [RPCAUTH_gssd] = { .name = "gssd", .mode = S_IFDIR | 0555, }, }; /* * This call can be used only in RPC pipefs mount notification hooks. */ struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name) { struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); return d_hash_and_lookup(sb->s_root, &dir); } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); int rpc_pipefs_init_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); sn->gssd_dummy = rpc_mkpipe_data(&gssd_dummy_pipe_ops, 0); if (IS_ERR(sn->gssd_dummy)) return PTR_ERR(sn->gssd_dummy); mutex_init(&sn->pipefs_sb_lock); sn->pipe_version = -1; return 0; } void rpc_pipefs_exit_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); rpc_destroy_pipe_data(sn->gssd_dummy); } /* * This call will be used for per network namespace operations calls. * Note: Function will be returned with pipefs_sb_lock taken if superblock was * found. This lock have to be released by rpc_put_sb_net() when all operations * will be completed. */ struct super_block *rpc_get_sb_net(const struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); if (sn->pipefs_sb) return sn->pipefs_sb; mutex_unlock(&sn->pipefs_sb_lock); return NULL; } EXPORT_SYMBOL_GPL(rpc_get_sb_net); void rpc_put_sb_net(const struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); WARN_ON(sn->pipefs_sb == NULL); mutex_unlock(&sn->pipefs_sb_lock); } EXPORT_SYMBOL_GPL(rpc_put_sb_net); static const struct rpc_filelist gssd_dummy_clnt_dir[] = { [0] = { .name = "clntXX", .mode = S_IFDIR | 0555, }, }; static ssize_t dummy_downcall(struct file *filp, const char __user *src, size_t len) { return -EINVAL; } static const struct rpc_pipe_ops gssd_dummy_pipe_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = dummy_downcall, }; /* * Here we present a bogus "info" file to keep rpc.gssd happy. We don't expect * that it will ever use this info to handle an upcall, but rpc.gssd expects * that this file will be there and have a certain format. */ static int rpc_dummy_info_show(struct seq_file *m, void *v) { seq_printf(m, "RPC server: %s\n", utsname()->nodename); seq_printf(m, "service: foo (1) version 0\n"); seq_printf(m, "address: 127.0.0.1\n"); seq_printf(m, "protocol: tcp\n"); seq_printf(m, "port: 0\n"); return 0; } DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info); static const struct rpc_filelist gssd_dummy_info_file[] = { [0] = { .name = "info", .i_fop = &rpc_dummy_info_fops, .mode = S_IFREG | 0400, }, }; /** * rpc_gssd_dummy_populate - create a dummy gssd pipe * @root: root of the rpc_pipefs filesystem * @pipe_data: pipe data created when netns is initialized * * Create a dummy set of directories and a pipe that gssd can hold open to * indicate that it is up and running. */ static struct dentry * rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) { int ret = 0; struct dentry *gssd_dentry; struct dentry *clnt_dentry = NULL; struct dentry *pipe_dentry = NULL; struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name, strlen(files[RPCAUTH_gssd].name)); /* We should never get this far if "gssd" doesn't exist */ gssd_dentry = d_hash_and_lookup(root, &q); if (!gssd_dentry) return ERR_PTR(-ENOENT); ret = rpc_populate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1, NULL); if (ret) { pipe_dentry = ERR_PTR(ret); goto out; } q.name = gssd_dummy_clnt_dir[0].name; q.len = strlen(gssd_dummy_clnt_dir[0].name); clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); if (!clnt_dentry) { __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); goto out; } ret = rpc_populate(clnt_dentry, gssd_dummy_info_file, 0, 1, NULL); if (ret) { __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(ret); goto out; } pipe_dentry = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data); if (IS_ERR(pipe_dentry)) { __rpc_depopulate(clnt_dentry, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); } out: dput(clnt_dentry); dput(gssd_dentry); return pipe_dentry; } static void rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) { struct dentry *clnt_dir = pipe_dentry->d_parent; struct dentry *gssd_dir = clnt_dir->d_parent; dget(pipe_dentry); __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); dput(pipe_dentry); } static int rpc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dentry *root, *gssd_dentry; struct net *net = sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int err; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; sb->s_d_op = &simple_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | 0555); sb->s_root = root = d_make_root(inode); if (!root) return -ENOMEM; if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; gssd_dentry = rpc_gssd_dummy_populate(root, sn->gssd_dummy); if (IS_ERR(gssd_dentry)) { __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); return PTR_ERR(gssd_dentry); } dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n", net->ns.inum, NET_NAME(net)); mutex_lock(&sn->pipefs_sb_lock); sn->pipefs_sb = sb; err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, sb); if (err) goto err_depopulate; mutex_unlock(&sn->pipefs_sb_lock); return 0; err_depopulate: rpc_gssd_dummy_depopulate(gssd_dentry); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); sn->pipefs_sb = NULL; __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); mutex_unlock(&sn->pipefs_sb_lock); return err; } bool gssd_running(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe = sn->gssd_dummy; return pipe->nreaders || pipe->nwriters; } EXPORT_SYMBOL_GPL(gssd_running); static int rpc_fs_get_tree(struct fs_context *fc) { return get_tree_keyed(fc, rpc_fill_super, get_net(fc->net_ns)); } static void rpc_fs_free_fc(struct fs_context *fc) { if (fc->s_fs_info) put_net(fc->s_fs_info); } static const struct fs_context_operations rpc_fs_context_ops = { .free = rpc_fs_free_fc, .get_tree = rpc_fs_get_tree, }; static int rpc_init_fs_context(struct fs_context *fc) { put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(fc->net_ns->user_ns); fc->ops = &rpc_fs_context_ops; return 0; } static void rpc_kill_sb(struct super_block *sb) { struct net *net = sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_lock(&sn->pipefs_sb_lock); if (sn->pipefs_sb != sb) { mutex_unlock(&sn->pipefs_sb_lock); goto out; } sn->pipefs_sb = NULL; dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n", net->ns.inum, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); mutex_unlock(&sn->pipefs_sb_lock); out: kill_litter_super(sb); put_net(net); } static struct file_system_type rpc_pipe_fs_type = { .owner = THIS_MODULE, .name = "rpc_pipefs", .init_fs_context = rpc_init_fs_context, .kill_sb = rpc_kill_sb, }; MODULE_ALIAS_FS("rpc_pipefs"); MODULE_ALIAS("rpc_pipefs"); static void init_once(void *foo) { struct rpc_inode *rpci = (struct rpc_inode *) foo; inode_init_once(&rpci->vfs_inode); rpci->private = NULL; rpci->pipe = NULL; init_waitqueue_head(&rpci->waitq); } int register_rpc_pipefs(void) { int err; rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", sizeof(struct rpc_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_ACCOUNT), init_once); if (!rpc_inode_cachep) return -ENOMEM; err = rpc_clients_notifier_register(); if (err) goto err_notifier; err = register_filesystem(&rpc_pipe_fs_type); if (err) goto err_register; return 0; err_register: rpc_clients_notifier_unregister(); err_notifier: kmem_cache_destroy(rpc_inode_cachep); return err; } void unregister_rpc_pipefs(void) { rpc_clients_notifier_unregister(); unregister_filesystem(&rpc_pipe_fs_type); kmem_cache_destroy(rpc_inode_cachep); } |
2 1 1 7 5 4 1 2 5 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 | // SPDX-License-Identifier: GPL-2.0-only /* * Media entity * * Copyright (C) 2010 Nokia Corporation * * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> * Sakari Ailus <sakari.ailus@iki.fi> */ #include <linux/bitmap.h> #include <linux/list.h> #include <linux/property.h> #include <linux/slab.h> #include <media/media-entity.h> #include <media/media-device.h> static inline const char *intf_type(struct media_interface *intf) { switch (intf->type) { case MEDIA_INTF_T_DVB_FE: return "dvb-frontend"; case MEDIA_INTF_T_DVB_DEMUX: return "dvb-demux"; case MEDIA_INTF_T_DVB_DVR: return "dvb-dvr"; case MEDIA_INTF_T_DVB_CA: return "dvb-ca"; case MEDIA_INTF_T_DVB_NET: return "dvb-net"; case MEDIA_INTF_T_V4L_VIDEO: return "v4l-video"; case MEDIA_INTF_T_V4L_VBI: return "v4l-vbi"; case MEDIA_INTF_T_V4L_RADIO: return "v4l-radio"; case MEDIA_INTF_T_V4L_SUBDEV: return "v4l-subdev"; case MEDIA_INTF_T_V4L_SWRADIO: return "v4l-swradio"; case MEDIA_INTF_T_V4L_TOUCH: return "v4l-touch"; default: return "unknown-intf"; } }; static inline const char *link_type_name(struct media_link *link) { switch (link->flags & MEDIA_LNK_FL_LINK_TYPE) { case MEDIA_LNK_FL_DATA_LINK: return "data"; case MEDIA_LNK_FL_INTERFACE_LINK: return "interface"; case MEDIA_LNK_FL_ANCILLARY_LINK: return "ancillary"; default: return "unknown"; } } __must_check int media_entity_enum_init(struct media_entity_enum *ent_enum, struct media_device *mdev) { int idx_max; idx_max = ALIGN(mdev->entity_internal_idx_max + 1, BITS_PER_LONG); ent_enum->bmap = bitmap_zalloc(idx_max, GFP_KERNEL); if (!ent_enum->bmap) return -ENOMEM; ent_enum->idx_max = idx_max; return 0; } EXPORT_SYMBOL_GPL(media_entity_enum_init); void media_entity_enum_cleanup(struct media_entity_enum *ent_enum) { bitmap_free(ent_enum->bmap); } EXPORT_SYMBOL_GPL(media_entity_enum_cleanup); /** * dev_dbg_obj - Prints in debug mode a change on some object * * @event_name: Name of the event to report. Could be __func__ * @gobj: Pointer to the object * * Enabled only if DEBUG or CONFIG_DYNAMIC_DEBUG. Otherwise, it * won't produce any code. */ static void dev_dbg_obj(const char *event_name, struct media_gobj *gobj) { #if defined(DEBUG) || defined (CONFIG_DYNAMIC_DEBUG) switch (media_type(gobj)) { case MEDIA_GRAPH_ENTITY: dev_dbg(gobj->mdev->dev, "%s id %u: entity '%s'\n", event_name, media_id(gobj), gobj_to_entity(gobj)->name); break; case MEDIA_GRAPH_LINK: { struct media_link *link = gobj_to_link(gobj); dev_dbg(gobj->mdev->dev, "%s id %u: %s link id %u ==> id %u\n", event_name, media_id(gobj), link_type_name(link), media_id(link->gobj0), media_id(link->gobj1)); break; } case MEDIA_GRAPH_PAD: { struct media_pad *pad = gobj_to_pad(gobj); dev_dbg(gobj->mdev->dev, "%s id %u: %s%spad '%s':%d\n", event_name, media_id(gobj), pad->flags & MEDIA_PAD_FL_SINK ? "sink " : "", pad->flags & MEDIA_PAD_FL_SOURCE ? "source " : "", pad->entity->name, pad->index); break; } case MEDIA_GRAPH_INTF_DEVNODE: { struct media_interface *intf = gobj_to_intf(gobj); struct media_intf_devnode *devnode = intf_to_devnode(intf); dev_dbg(gobj->mdev->dev, "%s id %u: intf_devnode %s - major: %d, minor: %d\n", event_name, media_id(gobj), intf_type(intf), devnode->major, devnode->minor); break; } } #endif } void media_gobj_create(struct media_device *mdev, enum media_gobj_type type, struct media_gobj *gobj) { BUG_ON(!mdev); gobj->mdev = mdev; /* Create a per-type unique object ID */ gobj->id = media_gobj_gen_id(type, ++mdev->id); switch (type) { case MEDIA_GRAPH_ENTITY: list_add_tail(&gobj->list, &mdev->entities); break; case MEDIA_GRAPH_PAD: list_add_tail(&gobj->list, &mdev->pads); break; case MEDIA_GRAPH_LINK: list_add_tail(&gobj->list, &mdev->links); break; case MEDIA_GRAPH_INTF_DEVNODE: list_add_tail(&gobj->list, &mdev->interfaces); break; } mdev->topology_version++; dev_dbg_obj(__func__, gobj); } void media_gobj_destroy(struct media_gobj *gobj) { /* Do nothing if the object is not linked. */ if (gobj->mdev == NULL) return; dev_dbg_obj(__func__, gobj); gobj->mdev->topology_version++; /* Remove the object from mdev list */ list_del(&gobj->list); gobj->mdev = NULL; } /* * TODO: Get rid of this. */ #define MEDIA_ENTITY_MAX_PADS 512 int media_entity_pads_init(struct media_entity *entity, u16 num_pads, struct media_pad *pads) { struct media_device *mdev = entity->graph_obj.mdev; struct media_pad *iter; unsigned int i = 0; int ret = 0; if (num_pads >= MEDIA_ENTITY_MAX_PADS) return -E2BIG; entity->num_pads = num_pads; entity->pads = pads; if (mdev) mutex_lock(&mdev->graph_mutex); media_entity_for_each_pad(entity, iter) { iter->entity = entity; iter->index = i++; if (hweight32(iter->flags & (MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_SOURCE)) != 1) { ret = -EINVAL; break; } if (mdev) media_gobj_create(mdev, MEDIA_GRAPH_PAD, &iter->graph_obj); } if (ret && mdev) { media_entity_for_each_pad(entity, iter) media_gobj_destroy(&iter->graph_obj); } if (mdev) mutex_unlock(&mdev->graph_mutex); return ret; } EXPORT_SYMBOL_GPL(media_entity_pads_init); /* ----------------------------------------------------------------------------- * Graph traversal */ /** * media_entity_has_pad_interdep - Check interdependency between two pads * * @entity: The entity * @pad0: The first pad index * @pad1: The second pad index * * This function checks the interdependency inside the entity between @pad0 * and @pad1. If two pads are interdependent they are part of the same pipeline * and enabling one of the pads means that the other pad will become "locked" * and doesn't allow configuration changes. * * This function uses the &media_entity_operations.has_pad_interdep() operation * to check the dependency inside the entity between @pad0 and @pad1. If the * has_pad_interdep operation is not implemented, all pads of the entity are * considered to be interdependent. * * One of @pad0 and @pad1 must be a sink pad and the other one a source pad. * The function returns false if both pads are sinks or sources. * * The caller must hold entity->graph_obj.mdev->mutex. * * Return: true if the pads are connected internally and false otherwise. */ static bool media_entity_has_pad_interdep(struct media_entity *entity, unsigned int pad0, unsigned int pad1) { if (pad0 >= entity->num_pads || pad1 >= entity->num_pads) return false; if (entity->pads[pad0].flags & entity->pads[pad1].flags & (MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_SOURCE)) return false; if (!entity->ops || !entity->ops->has_pad_interdep) return true; return entity->ops->has_pad_interdep(entity, pad0, pad1); } static struct media_entity * media_entity_other(struct media_entity *entity, struct media_link *link) { if (link->source->entity == entity) return link->sink->entity; else return link->source->entity; } /* push an entity to traversal stack */ static void stack_push(struct media_graph *graph, struct media_entity *entity) { if (graph->top == MEDIA_ENTITY_ENUM_MAX_DEPTH - 1) { WARN_ON(1); return; } graph->top++; graph->stack[graph->top].link = entity->links.next; graph->stack[graph->top].entity = entity; } static struct media_entity *stack_pop(struct media_graph *graph) { struct media_entity *entity; entity = graph->stack[graph->top].entity; graph->top--; return entity; } #define link_top(en) ((en)->stack[(en)->top].link) #define stack_top(en) ((en)->stack[(en)->top].entity) /** * media_graph_walk_init - Allocate resources for graph walk * @graph: Media graph structure that will be used to walk the graph * @mdev: Media device * * Reserve resources for graph walk in media device's current * state. The memory must be released using * media_graph_walk_cleanup(). * * Returns error on failure, zero on success. */ __must_check int media_graph_walk_init( struct media_graph *graph, struct media_device *mdev) { return media_entity_enum_init(&graph->ent_enum, mdev); } EXPORT_SYMBOL_GPL(media_graph_walk_init); /** * media_graph_walk_cleanup - Release resources related to graph walking * @graph: Media graph structure that was used to walk the graph */ void media_graph_walk_cleanup(struct media_graph *graph) { media_entity_enum_cleanup(&graph->ent_enum); } EXPORT_SYMBOL_GPL(media_graph_walk_cleanup); void media_graph_walk_start(struct media_graph *graph, struct media_entity *entity) { media_entity_enum_zero(&graph->ent_enum); media_entity_enum_set(&graph->ent_enum, entity); graph->top = 0; graph->stack[graph->top].entity = NULL; stack_push(graph, entity); dev_dbg(entity->graph_obj.mdev->dev, "begin graph walk at '%s'\n", entity->name); } EXPORT_SYMBOL_GPL(media_graph_walk_start); static void media_graph_walk_iter(struct media_graph *graph) { struct media_entity *entity = stack_top(graph); struct media_link *link; struct media_entity *next; link = list_entry(link_top(graph), typeof(*link), list); /* If the link is not a data link, don't follow it */ if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) != MEDIA_LNK_FL_DATA_LINK) { link_top(graph) = link_top(graph)->next; return; } /* The link is not enabled so we do not follow. */ if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { link_top(graph) = link_top(graph)->next; dev_dbg(entity->graph_obj.mdev->dev, "walk: skipping disabled link '%s':%u -> '%s':%u\n", link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index); return; } /* Get the entity at the other end of the link. */ next = media_entity_other(entity, link); /* Has the entity already been visited? */ if (media_entity_enum_test_and_set(&graph->ent_enum, next)) { link_top(graph) = link_top(graph)->next; dev_dbg(entity->graph_obj.mdev->dev, "walk: skipping entity '%s' (already seen)\n", next->name); return; } /* Push the new entity to stack and start over. */ link_top(graph) = link_top(graph)->next; stack_push(graph, next); dev_dbg(entity->graph_obj.mdev->dev, "walk: pushing '%s' on stack\n", next->name); lockdep_assert_held(&entity->graph_obj.mdev->graph_mutex); } struct media_entity *media_graph_walk_next(struct media_graph *graph) { struct media_entity *entity; if (stack_top(graph) == NULL) return NULL; /* * Depth first search. Push entity to stack and continue from * top of the stack until no more entities on the level can be * found. */ while (link_top(graph) != &stack_top(graph)->links) media_graph_walk_iter(graph); entity = stack_pop(graph); dev_dbg(entity->graph_obj.mdev->dev, "walk: returning entity '%s'\n", entity->name); return entity; } EXPORT_SYMBOL_GPL(media_graph_walk_next); /* ----------------------------------------------------------------------------- * Pipeline management */ /* * The pipeline traversal stack stores pads that are reached during graph * traversal, with a list of links to be visited to continue the traversal. * When a new pad is reached, an entry is pushed on the top of the stack and * points to the incoming pad and the first link of the entity. * * To find further pads in the pipeline, the traversal algorithm follows * internal pad dependencies in the entity, and then links in the graph. It * does so by iterating over all links of the entity, and following enabled * links that originate from a pad that is internally connected to the incoming * pad, as reported by the media_entity_has_pad_interdep() function. */ /** * struct media_pipeline_walk_entry - Entry in the pipeline traversal stack * * @pad: The media pad being visited * @links: Links left to be visited */ struct media_pipeline_walk_entry { struct media_pad *pad; struct list_head *links; }; /** * struct media_pipeline_walk - State used by the media pipeline traversal * algorithm * * @mdev: The media device * @stack: Depth-first search stack * @stack.size: Number of allocated entries in @stack.entries * @stack.top: Index of the top stack entry (-1 if the stack is empty) * @stack.entries: Stack entries */ struct media_pipeline_walk { struct media_device *mdev; struct { unsigned int size; int top; struct media_pipeline_walk_entry *entries; } stack; }; #define MEDIA_PIPELINE_STACK_GROW_STEP 16 static struct media_pipeline_walk_entry * media_pipeline_walk_top(struct media_pipeline_walk *walk) { return &walk->stack.entries[walk->stack.top]; } static bool media_pipeline_walk_empty(struct media_pipeline_walk *walk) { return walk->stack.top == -1; } /* Increase the stack size by MEDIA_PIPELINE_STACK_GROW_STEP elements. */ static int media_pipeline_walk_resize(struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entries; unsigned int new_size; /* Safety check, to avoid stack overflows in case of bugs. */ if (walk->stack.size >= 256) return -E2BIG; new_size = walk->stack.size + MEDIA_PIPELINE_STACK_GROW_STEP; entries = krealloc(walk->stack.entries, new_size * sizeof(*walk->stack.entries), GFP_KERNEL); if (!entries) return -ENOMEM; walk->stack.entries = entries; walk->stack.size = new_size; return 0; } /* Push a new entry on the stack. */ static int media_pipeline_walk_push(struct media_pipeline_walk *walk, struct media_pad *pad) { struct media_pipeline_walk_entry *entry; int ret; if (walk->stack.top + 1 >= walk->stack.size) { ret = media_pipeline_walk_resize(walk); if (ret) return ret; } walk->stack.top++; entry = media_pipeline_walk_top(walk); entry->pad = pad; entry->links = pad->entity->links.next; dev_dbg(walk->mdev->dev, "media pipeline: pushed entry %u: '%s':%u\n", walk->stack.top, pad->entity->name, pad->index); return 0; } /* * Move the top entry link cursor to the next link. If all links of the entry * have been visited, pop the entry itself. */ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry; if (WARN_ON(walk->stack.top < 0)) return; entry = media_pipeline_walk_top(walk); if (entry->links->next == &entry->pad->entity->links) { dev_dbg(walk->mdev->dev, "media pipeline: entry %u has no more links, popping\n", walk->stack.top); walk->stack.top--; return; } entry->links = entry->links->next; dev_dbg(walk->mdev->dev, "media pipeline: moved entry %u to next link\n", walk->stack.top); } /* Free all memory allocated while walking the pipeline. */ static void media_pipeline_walk_destroy(struct media_pipeline_walk *walk) { kfree(walk->stack.entries); } /* Add a pad to the pipeline and push it to the stack. */ static int media_pipeline_add_pad(struct media_pipeline *pipe, struct media_pipeline_walk *walk, struct media_pad *pad) { struct media_pipeline_pad *ppad; list_for_each_entry(ppad, &pipe->pads, list) { if (ppad->pad == pad) { dev_dbg(pad->graph_obj.mdev->dev, "media pipeline: already contains pad '%s':%u\n", pad->entity->name, pad->index); return 0; } } ppad = kzalloc(sizeof(*ppad), GFP_KERNEL); if (!ppad) return -ENOMEM; ppad->pipe = pipe; ppad->pad = pad; list_add_tail(&ppad->list, &pipe->pads); dev_dbg(pad->graph_obj.mdev->dev, "media pipeline: added pad '%s':%u\n", pad->entity->name, pad->index); return media_pipeline_walk_push(walk, pad); } /* Explore the next link of the entity at the top of the stack. */ static int media_pipeline_explore_next_link(struct media_pipeline *pipe, struct media_pipeline_walk *walk) { struct media_pipeline_walk_entry *entry = media_pipeline_walk_top(walk); struct media_pad *pad; struct media_link *link; struct media_pad *local; struct media_pad *remote; int ret; pad = entry->pad; link = list_entry(entry->links, typeof(*link), list); media_pipeline_walk_pop(walk); dev_dbg(walk->mdev->dev, "media pipeline: exploring link '%s':%u -> '%s':%u\n", link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index); /* Skip links that are not enabled. */ if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (disabled)\n"); return 0; } /* Get the local pad and remote pad. */ if (link->source->entity == pad->entity) { local = link->source; remote = link->sink; } else { local = link->sink; remote = link->source; } /* * Skip links that originate from a different pad than the incoming pad * that is not connected internally in the entity to the incoming pad. */ if (pad != local && !media_entity_has_pad_interdep(pad->entity, pad->index, local->index)) { dev_dbg(walk->mdev->dev, "media pipeline: skipping link (no route)\n"); return 0; } /* * Add the local and remote pads of the link to the pipeline and push * them to the stack, if they're not already present. */ ret = media_pipeline_add_pad(pipe, walk, local); if (ret) return ret; ret = media_pipeline_add_pad(pipe, walk, remote); if (ret) return ret; return 0; } static void media_pipeline_cleanup(struct media_pipeline *pipe) { while (!list_empty(&pipe->pads)) { struct media_pipeline_pad *ppad; ppad = list_first_entry(&pipe->pads, typeof(*ppad), list); list_del(&ppad->list); kfree(ppad); } } static int media_pipeline_populate(struct media_pipeline *pipe, struct media_pad *pad) { struct media_pipeline_walk walk = { }; struct media_pipeline_pad *ppad; int ret; /* * Populate the media pipeline by walking the media graph, starting * from @pad. */ INIT_LIST_HEAD(&pipe->pads); pipe->mdev = pad->graph_obj.mdev; walk.mdev = pipe->mdev; walk.stack.top = -1; ret = media_pipeline_add_pad(pipe, &walk, pad); if (ret) goto done; /* * Use a depth-first search algorithm: as long as the stack is not * empty, explore the next link of the top entry. The * media_pipeline_explore_next_link() function will either move to the * next link, pop the entry if fully visited, or add new entries on * top. */ while (!media_pipeline_walk_empty(&walk)) { ret = media_pipeline_explore_next_link(pipe, &walk); if (ret) goto done; } dev_dbg(pad->graph_obj.mdev->dev, "media pipeline populated, found pads:\n"); list_for_each_entry(ppad, &pipe->pads, list) dev_dbg(pad->graph_obj.mdev->dev, "- '%s':%u\n", ppad->pad->entity->name, ppad->pad->index); WARN_ON(walk.stack.top != -1); ret = 0; done: media_pipeline_walk_destroy(&walk); if (ret) media_pipeline_cleanup(pipe); return ret; } __must_check int __media_pipeline_start(struct media_pad *pad, struct media_pipeline *pipe) { struct media_device *mdev = pad->graph_obj.mdev; struct media_pipeline_pad *err_ppad; struct media_pipeline_pad *ppad; int ret; lockdep_assert_held(&mdev->graph_mutex); /* * If the pad is already part of a pipeline, that pipeline must be the * same as the pipe given to media_pipeline_start(). */ if (WARN_ON(pad->pipe && pad->pipe != pipe)) return -EINVAL; /* * If the pipeline has already been started, it is guaranteed to be * valid, so just increase the start count. */ if (pipe->start_count) { pipe->start_count++; return 0; } /* * Populate the pipeline. This populates the media_pipeline pads list * with media_pipeline_pad instances for each pad found during graph * walk. */ ret = media_pipeline_populate(pipe, pad); if (ret) return ret; /* * Now that all the pads in the pipeline have been gathered, perform * the validation steps. */ list_for_each_entry(ppad, &pipe->pads, list) { struct media_pad *pad = ppad->pad; struct media_entity *entity = pad->entity; bool has_enabled_link = false; bool has_link = false; struct media_link *link; dev_dbg(mdev->dev, "Validating pad '%s':%u\n", pad->entity->name, pad->index); /* * 1. Ensure that the pad doesn't already belong to a different * pipeline. */ if (pad->pipe) { dev_dbg(mdev->dev, "Failed to start pipeline: pad '%s':%u busy\n", pad->entity->name, pad->index); ret = -EBUSY; goto error; } /* * 2. Validate all active links whose sink is the current pad. * Validation of the source pads is performed in the context of * the connected sink pad to avoid duplicating checks. */ for_each_media_entity_data_link(entity, link) { /* Skip links unrelated to the current pad. */ if (link->sink != pad && link->source != pad) continue; /* Record if the pad has links and enabled links. */ if (link->flags & MEDIA_LNK_FL_ENABLED) has_enabled_link = true; has_link = true; /* * Validate the link if it's enabled and has the * current pad as its sink. */ if (!(link->flags & MEDIA_LNK_FL_ENABLED)) continue; if (link->sink != pad) continue; if (!entity->ops || !entity->ops->link_validate) continue; ret = entity->ops->link_validate(link); if (ret) { dev_dbg(mdev->dev, "Link '%s':%u -> '%s':%u failed validation: %d\n", link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index, ret); goto error; } dev_dbg(mdev->dev, "Link '%s':%u -> '%s':%u is valid\n", link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index); } /* * 3. If the pad has the MEDIA_PAD_FL_MUST_CONNECT flag set, * ensure that it has either no link or an enabled link. */ if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && has_link && !has_enabled_link) { dev_dbg(mdev->dev, "Pad '%s':%u must be connected by an enabled link\n", pad->entity->name, pad->index); ret = -ENOLINK; goto error; } /* Validation passed, store the pipe pointer in the pad. */ pad->pipe = pipe; } pipe->start_count++; return 0; error: /* * Link validation on graph failed. We revert what we did and * return the error. */ list_for_each_entry(err_ppad, &pipe->pads, list) { if (err_ppad == ppad) break; err_ppad->pad->pipe = NULL; } media_pipeline_cleanup(pipe); return ret; } EXPORT_SYMBOL_GPL(__media_pipeline_start); __must_check int media_pipeline_start(struct media_pad *pad, struct media_pipeline *pipe) { struct media_device *mdev = pad->graph_obj.mdev; int ret; mutex_lock(&mdev->graph_mutex); ret = __media_pipeline_start(pad, pipe); mutex_unlock(&mdev->graph_mutex); return ret; } EXPORT_SYMBOL_GPL(media_pipeline_start); void __media_pipeline_stop(struct media_pad *pad) { struct media_pipeline *pipe = pad->pipe; struct media_pipeline_pad *ppad; /* * If the following check fails, the driver has performed an * unbalanced call to media_pipeline_stop() */ if (WARN_ON(!pipe)) return; if (--pipe->start_count) return; list_for_each_entry(ppad, &pipe->pads, list) ppad->pad->pipe = NULL; media_pipeline_cleanup(pipe); if (pipe->allocated) kfree(pipe); } EXPORT_SYMBOL_GPL(__media_pipeline_stop); void media_pipeline_stop(struct media_pad *pad) { struct media_device *mdev = pad->graph_obj.mdev; mutex_lock(&mdev->graph_mutex); __media_pipeline_stop(pad); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_pipeline_stop); __must_check int media_pipeline_alloc_start(struct media_pad *pad) { struct media_device *mdev = pad->graph_obj.mdev; struct media_pipeline *new_pipe = NULL; struct media_pipeline *pipe; int ret; mutex_lock(&mdev->graph_mutex); /* * Is the pad already part of a pipeline? If not, we need to allocate * a pipe. */ pipe = media_pad_pipeline(pad); if (!pipe) { new_pipe = kzalloc(sizeof(*new_pipe), GFP_KERNEL); if (!new_pipe) { ret = -ENOMEM; goto out; } pipe = new_pipe; pipe->allocated = true; } ret = __media_pipeline_start(pad, pipe); if (ret) kfree(new_pipe); out: mutex_unlock(&mdev->graph_mutex); return ret; } EXPORT_SYMBOL_GPL(media_pipeline_alloc_start); struct media_pad * __media_pipeline_pad_iter_next(struct media_pipeline *pipe, struct media_pipeline_pad_iter *iter, struct media_pad *pad) { if (!pad) iter->cursor = pipe->pads.next; if (iter->cursor == &pipe->pads) return NULL; pad = list_entry(iter->cursor, struct media_pipeline_pad, list)->pad; iter->cursor = iter->cursor->next; return pad; } EXPORT_SYMBOL_GPL(__media_pipeline_pad_iter_next); int media_pipeline_entity_iter_init(struct media_pipeline *pipe, struct media_pipeline_entity_iter *iter) { return media_entity_enum_init(&iter->ent_enum, pipe->mdev); } EXPORT_SYMBOL_GPL(media_pipeline_entity_iter_init); void media_pipeline_entity_iter_cleanup(struct media_pipeline_entity_iter *iter) { media_entity_enum_cleanup(&iter->ent_enum); } EXPORT_SYMBOL_GPL(media_pipeline_entity_iter_cleanup); struct media_entity * __media_pipeline_entity_iter_next(struct media_pipeline *pipe, struct media_pipeline_entity_iter *iter, struct media_entity *entity) { if (!entity) iter->cursor = pipe->pads.next; while (iter->cursor != &pipe->pads) { struct media_pipeline_pad *ppad; struct media_entity *entity; ppad = list_entry(iter->cursor, struct media_pipeline_pad, list); entity = ppad->pad->entity; iter->cursor = iter->cursor->next; if (!media_entity_enum_test_and_set(&iter->ent_enum, entity)) return entity; } return NULL; } EXPORT_SYMBOL_GPL(__media_pipeline_entity_iter_next); /* ----------------------------------------------------------------------------- * Links management */ static struct media_link *media_add_link(struct list_head *head) { struct media_link *link; link = kzalloc(sizeof(*link), GFP_KERNEL); if (link == NULL) return NULL; list_add_tail(&link->list, head); return link; } static void __media_entity_remove_link(struct media_entity *entity, struct media_link *link) { struct media_link *rlink, *tmp; struct media_entity *remote; /* Remove the reverse links for a data link. */ if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == MEDIA_LNK_FL_DATA_LINK) { if (link->source->entity == entity) remote = link->sink->entity; else remote = link->source->entity; list_for_each_entry_safe(rlink, tmp, &remote->links, list) { if (rlink != link->reverse) continue; if (link->source->entity == entity) remote->num_backlinks--; /* Remove the remote link */ list_del(&rlink->list); media_gobj_destroy(&rlink->graph_obj); kfree(rlink); if (--remote->num_links == 0) break; } } list_del(&link->list); media_gobj_destroy(&link->graph_obj); kfree(link); } int media_get_pad_index(struct media_entity *entity, u32 pad_type, enum media_pad_signal_type sig_type) { unsigned int i; if (!entity) return -EINVAL; for (i = 0; i < entity->num_pads; i++) { if ((entity->pads[i].flags & (MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_SOURCE)) != pad_type) continue; if (entity->pads[i].sig_type == sig_type) return i; } return -EINVAL; } EXPORT_SYMBOL_GPL(media_get_pad_index); int media_create_pad_link(struct media_entity *source, u16 source_pad, struct media_entity *sink, u16 sink_pad, u32 flags) { struct media_link *link; struct media_link *backlink; if (WARN_ON(!source || !sink) || WARN_ON(source_pad >= source->num_pads) || WARN_ON(sink_pad >= sink->num_pads)) return -EINVAL; if (WARN_ON(!(source->pads[source_pad].flags & MEDIA_PAD_FL_SOURCE))) return -EINVAL; if (WARN_ON(!(sink->pads[sink_pad].flags & MEDIA_PAD_FL_SINK))) return -EINVAL; link = media_add_link(&source->links); if (link == NULL) return -ENOMEM; link->source = &source->pads[source_pad]; link->sink = &sink->pads[sink_pad]; link->flags = flags & ~MEDIA_LNK_FL_INTERFACE_LINK; /* Initialize graph object embedded at the new link */ media_gobj_create(source->graph_obj.mdev, MEDIA_GRAPH_LINK, &link->graph_obj); /* Create the backlink. Backlinks are used to help graph traversal and * are not reported to userspace. */ backlink = media_add_link(&sink->links); if (backlink == NULL) { __media_entity_remove_link(source, link); return -ENOMEM; } backlink->source = &source->pads[source_pad]; backlink->sink = &sink->pads[sink_pad]; backlink->flags = flags; backlink->is_backlink = true; /* Initialize graph object embedded at the new link */ media_gobj_create(sink->graph_obj.mdev, MEDIA_GRAPH_LINK, &backlink->graph_obj); link->reverse = backlink; backlink->reverse = link; sink->num_backlinks++; sink->num_links++; source->num_links++; return 0; } EXPORT_SYMBOL_GPL(media_create_pad_link); int media_create_pad_links(const struct media_device *mdev, const u32 source_function, struct media_entity *source, const u16 source_pad, const u32 sink_function, struct media_entity *sink, const u16 sink_pad, u32 flags, const bool allow_both_undefined) { struct media_entity *entity; unsigned function; int ret; /* Trivial case: 1:1 relation */ if (source && sink) return media_create_pad_link(source, source_pad, sink, sink_pad, flags); /* Worse case scenario: n:n relation */ if (!source && !sink) { if (!allow_both_undefined) return 0; media_device_for_each_entity(source, mdev) { if (source->function != source_function) continue; media_device_for_each_entity(sink, mdev) { if (sink->function != sink_function) continue; ret = media_create_pad_link(source, source_pad, sink, sink_pad, flags); if (ret) return ret; flags &= ~(MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); } } return 0; } /* Handle 1:n and n:1 cases */ if (source) function = sink_function; else function = source_function; media_device_for_each_entity(entity, mdev) { if (entity->function != function) continue; if (source) ret = media_create_pad_link(source, source_pad, entity, sink_pad, flags); else ret = media_create_pad_link(entity, source_pad, sink, sink_pad, flags); if (ret) return ret; flags &= ~(MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); } return 0; } EXPORT_SYMBOL_GPL(media_create_pad_links); void __media_entity_remove_links(struct media_entity *entity) { struct media_link *link, *tmp; list_for_each_entry_safe(link, tmp, &entity->links, list) __media_entity_remove_link(entity, link); entity->num_links = 0; entity->num_backlinks = 0; } EXPORT_SYMBOL_GPL(__media_entity_remove_links); void media_entity_remove_links(struct media_entity *entity) { struct media_device *mdev = entity->graph_obj.mdev; /* Do nothing if the entity is not registered. */ if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_entity_remove_links(entity); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_entity_remove_links); static int __media_entity_setup_link_notify(struct media_link *link, u32 flags) { int ret; /* Notify both entities. */ ret = media_entity_call(link->source->entity, link_setup, link->source, link->sink, flags); if (ret < 0 && ret != -ENOIOCTLCMD) return ret; ret = media_entity_call(link->sink->entity, link_setup, link->sink, link->source, flags); if (ret < 0 && ret != -ENOIOCTLCMD) { media_entity_call(link->source->entity, link_setup, link->source, link->sink, link->flags); return ret; } link->flags = flags; link->reverse->flags = link->flags; return 0; } int __media_entity_setup_link(struct media_link *link, u32 flags) { const u32 mask = MEDIA_LNK_FL_ENABLED; struct media_device *mdev; struct media_pad *source, *sink; int ret = -EBUSY; if (link == NULL) return -EINVAL; /* The non-modifiable link flags must not be modified. */ if ((link->flags & ~mask) != (flags & ~mask)) return -EINVAL; if (link->flags & MEDIA_LNK_FL_IMMUTABLE) return link->flags == flags ? 0 : -EINVAL; if (link->flags == flags) return 0; source = link->source; sink = link->sink; if (!(link->flags & MEDIA_LNK_FL_DYNAMIC) && (media_pad_is_streaming(source) || media_pad_is_streaming(sink))) return -EBUSY; mdev = source->graph_obj.mdev; if (mdev->ops && mdev->ops->link_notify) { ret = mdev->ops->link_notify(link, flags, MEDIA_DEV_NOTIFY_PRE_LINK_CH); if (ret < 0) return ret; } ret = __media_entity_setup_link_notify(link, flags); if (mdev->ops && mdev->ops->link_notify) mdev->ops->link_notify(link, flags, MEDIA_DEV_NOTIFY_POST_LINK_CH); return ret; } EXPORT_SYMBOL_GPL(__media_entity_setup_link); int media_entity_setup_link(struct media_link *link, u32 flags) { int ret; mutex_lock(&link->graph_obj.mdev->graph_mutex); ret = __media_entity_setup_link(link, flags); mutex_unlock(&link->graph_obj.mdev->graph_mutex); return ret; } EXPORT_SYMBOL_GPL(media_entity_setup_link); struct media_link * media_entity_find_link(struct media_pad *source, struct media_pad *sink) { struct media_link *link; for_each_media_entity_data_link(source->entity, link) { if (link->source->entity == source->entity && link->source->index == source->index && link->sink->entity == sink->entity && link->sink->index == sink->index) return link; } return NULL; } EXPORT_SYMBOL_GPL(media_entity_find_link); struct media_pad *media_pad_remote_pad_first(const struct media_pad *pad) { struct media_link *link; for_each_media_entity_data_link(pad->entity, link) { if (!(link->flags & MEDIA_LNK_FL_ENABLED)) continue; if (link->source == pad) return link->sink; if (link->sink == pad) return link->source; } return NULL; } EXPORT_SYMBOL_GPL(media_pad_remote_pad_first); struct media_pad * media_entity_remote_pad_unique(const struct media_entity *entity, unsigned int type) { struct media_pad *pad = NULL; struct media_link *link; list_for_each_entry(link, &entity->links, list) { struct media_pad *local_pad; struct media_pad *remote_pad; if (((link->flags & MEDIA_LNK_FL_LINK_TYPE) != MEDIA_LNK_FL_DATA_LINK) || !(link->flags & MEDIA_LNK_FL_ENABLED)) continue; if (type == MEDIA_PAD_FL_SOURCE) { local_pad = link->sink; remote_pad = link->source; } else { local_pad = link->source; remote_pad = link->sink; } if (local_pad->entity == entity) { if (pad) return ERR_PTR(-ENOTUNIQ); pad = remote_pad; } } if (!pad) return ERR_PTR(-ENOLINK); return pad; } EXPORT_SYMBOL_GPL(media_entity_remote_pad_unique); struct media_pad *media_pad_remote_pad_unique(const struct media_pad *pad) { struct media_pad *found_pad = NULL; struct media_link *link; list_for_each_entry(link, &pad->entity->links, list) { struct media_pad *remote_pad; if (!(link->flags & MEDIA_LNK_FL_ENABLED)) continue; if (link->sink == pad) remote_pad = link->source; else if (link->source == pad) remote_pad = link->sink; else continue; if (found_pad) return ERR_PTR(-ENOTUNIQ); found_pad = remote_pad; } if (!found_pad) return ERR_PTR(-ENOLINK); return found_pad; } EXPORT_SYMBOL_GPL(media_pad_remote_pad_unique); int media_entity_get_fwnode_pad(struct media_entity *entity, const struct fwnode_handle *fwnode, unsigned long direction_flags) { struct fwnode_endpoint endpoint; unsigned int i; int ret; if (!entity->ops || !entity->ops->get_fwnode_pad) { for (i = 0; i < entity->num_pads; i++) { if (entity->pads[i].flags & direction_flags) return i; } return -ENXIO; } ret = fwnode_graph_parse_endpoint(fwnode, &endpoint); if (ret) return ret; ret = entity->ops->get_fwnode_pad(entity, &endpoint); if (ret < 0) return ret; if (ret >= entity->num_pads) return -ENXIO; if (!(entity->pads[ret].flags & direction_flags)) return -ENXIO; return ret; } EXPORT_SYMBOL_GPL(media_entity_get_fwnode_pad); struct media_pipeline *media_entity_pipeline(struct media_entity *entity) { struct media_pad *pad; media_entity_for_each_pad(entity, pad) { if (pad->pipe) return pad->pipe; } return NULL; } EXPORT_SYMBOL_GPL(media_entity_pipeline); struct media_pipeline *media_pad_pipeline(struct media_pad *pad) { return pad->pipe; } EXPORT_SYMBOL_GPL(media_pad_pipeline); static void media_interface_init(struct media_device *mdev, struct media_interface *intf, u32 gobj_type, u32 intf_type, u32 flags) { intf->type = intf_type; intf->flags = flags; INIT_LIST_HEAD(&intf->links); media_gobj_create(mdev, gobj_type, &intf->graph_obj); } /* Functions related to the media interface via device nodes */ struct media_intf_devnode *media_devnode_create(struct media_device *mdev, u32 type, u32 flags, u32 major, u32 minor) { struct media_intf_devnode *devnode; devnode = kzalloc(sizeof(*devnode), GFP_KERNEL); if (!devnode) return NULL; devnode->major = major; devnode->minor = minor; media_interface_init(mdev, &devnode->intf, MEDIA_GRAPH_INTF_DEVNODE, type, flags); return devnode; } EXPORT_SYMBOL_GPL(media_devnode_create); void media_devnode_remove(struct media_intf_devnode *devnode) { media_remove_intf_links(&devnode->intf); media_gobj_destroy(&devnode->intf.graph_obj); kfree(devnode); } EXPORT_SYMBOL_GPL(media_devnode_remove); struct media_link *media_create_intf_link(struct media_entity *entity, struct media_interface *intf, u32 flags) { struct media_link *link; link = media_add_link(&intf->links); if (link == NULL) return NULL; link->intf = intf; link->entity = entity; link->flags = flags | MEDIA_LNK_FL_INTERFACE_LINK; /* Initialize graph object embedded at the new link */ media_gobj_create(intf->graph_obj.mdev, MEDIA_GRAPH_LINK, &link->graph_obj); return link; } EXPORT_SYMBOL_GPL(media_create_intf_link); void __media_remove_intf_link(struct media_link *link) { list_del(&link->list); media_gobj_destroy(&link->graph_obj); kfree(link); } EXPORT_SYMBOL_GPL(__media_remove_intf_link); void media_remove_intf_link(struct media_link *link) { struct media_device *mdev = link->graph_obj.mdev; /* Do nothing if the intf is not registered. */ if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_remove_intf_link(link); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_remove_intf_link); void __media_remove_intf_links(struct media_interface *intf) { struct media_link *link, *tmp; list_for_each_entry_safe(link, tmp, &intf->links, list) __media_remove_intf_link(link); } EXPORT_SYMBOL_GPL(__media_remove_intf_links); void media_remove_intf_links(struct media_interface *intf) { struct media_device *mdev = intf->graph_obj.mdev; /* Do nothing if the intf is not registered. */ if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_remove_intf_links(intf); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_remove_intf_links); struct media_link *media_create_ancillary_link(struct media_entity *primary, struct media_entity *ancillary) { struct media_link *link; link = media_add_link(&primary->links); if (!link) return ERR_PTR(-ENOMEM); link->gobj0 = &primary->graph_obj; link->gobj1 = &ancillary->graph_obj; link->flags = MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_ANCILLARY_LINK; /* Initialize graph object embedded in the new link */ media_gobj_create(primary->graph_obj.mdev, MEDIA_GRAPH_LINK, &link->graph_obj); return link; } EXPORT_SYMBOL_GPL(media_create_ancillary_link); struct media_link *__media_entity_next_link(struct media_entity *entity, struct media_link *link, unsigned long link_type) { link = link ? list_next_entry(link, list) : list_first_entry(&entity->links, typeof(*link), list); list_for_each_entry_from(link, &entity->links, list) if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == link_type) return link; return NULL; } EXPORT_SYMBOL_GPL(__media_entity_next_link); |
23 67 67 18 18 2 17 17 17 12 6 6 2 78 78 78 4 74 4 74 78 3 75 4 74 29 93 78 73 1 4 1 4 78 4 74 56 22 67 19 22 25 3 67 9 67 67 67 67 14 18 9 3 3 5 4 4 1 28 2 26 25 2 23 2 2 4 1 1 2 60 25 25 18 7 101 1 33 1 5 69 67 66 66 4 2 2 1 1 104 104 3 3 3 2 1 3 3 67 78 78 78 78 47 32 18 17 3 67 1 1 26 26 4 26 26 23 1 24 1 1 48 18 30 28 23 5 4 24 23 5 25 3 26 2 25 3 3 25 24 3 1 25 3 21 7 25 3 23 5 24 4 24 4 22 6 26 2 22 28 27 28 27 1 28 28 28 2 30 30 59 59 58 67 67 67 67 67 67 8 67 10 10 60 61 1 60 60 2 58 59 59 58 1 59 59 59 1 65 2 2 3 59 2 3 71 7 65 6 4 2 1 61 65 9 7 65 107 99 8 18 18 18 18 8 18 18 25 21 21 21 4 4 4 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 | /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. */ #include "fuse_i.h" #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/statfs.h> #include <linux/random.h> #include <linux/sched.h> #include <linux/exportfs.h> #include <linux/posix_acl.h> #include <linux/pid_namespace.h> #include <uapi/linux/magic.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); MODULE_LICENSE("GPL"); static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); static int set_global_limit(const char *val, const struct kernel_param *kp); unsigned max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, &max_user_bgreq, 0644); __MODULE_PARM_TYPE(max_user_bgreq, "uint"); MODULE_PARM_DESC(max_user_bgreq, "Global limit for the maximum number of backgrounded requests an " "unprivileged user can set"); unsigned max_user_congthresh; module_param_call(max_user_congthresh, set_global_limit, param_get_uint, &max_user_congthresh, 0644); __MODULE_PARM_TYPE(max_user_congthresh, "uint"); MODULE_PARM_DESC(max_user_congthresh, "Global limit for the maximum congestion threshold an " "unprivileged user can set"); #define FUSE_DEFAULT_BLKSIZE 512 /** Maximum number of outstanding background requests */ #define FUSE_DEFAULT_MAX_BACKGROUND 12 /** Congestion starts at 75% of maximum */ #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) #ifdef CONFIG_BLOCK static struct file_system_type fuseblk_fs_type; #endif struct fuse_forget_link *fuse_alloc_forget(void) { return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); } static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) { struct fuse_submount_lookup *sl; sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); if (!sl) return NULL; sl->forget = fuse_alloc_forget(); if (!sl->forget) goto out_free; return sl; out_free: kfree(sl); return NULL; } static struct inode *fuse_alloc_inode(struct super_block *sb) { struct fuse_inode *fi; fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL); if (!fi) return NULL; fi->i_time = 0; fi->inval_mask = ~0; fi->nodeid = 0; fi->nlookup = 0; fi->attr_version = 0; fi->orig_ino = 0; fi->state = 0; fi->submount_lookup = NULL; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); if (!fi->forget) goto out_free; if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) goto out_free_forget; return &fi->inode; out_free_forget: kfree(fi->forget); out_free: kmem_cache_free(fuse_inode_cachep, fi); return NULL; } static void fuse_free_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); mutex_destroy(&fi->mutex); kfree(fi->forget); #ifdef CONFIG_FUSE_DAX kfree(fi->dax); #endif kmem_cache_free(fuse_inode_cachep, fi); } static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, struct fuse_submount_lookup *sl) { if (!refcount_dec_and_test(&sl->count)) return; fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); sl->forget = NULL; kfree(sl); } static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); /* Will write inode on close/munmap and in all other dirtiers */ WARN_ON(inode->i_state & I_DIRTY_INODE); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); if (FUSE_IS_DAX(inode)) fuse_dax_inode_cleanup(inode); if (fi->nlookup) { fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); fi->forget = NULL; } if (fi->submount_lookup) { fuse_cleanup_submount_lookup(fc, fi->submount_lookup); fi->submount_lookup = NULL; } } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } } static int fuse_reconfigure(struct fs_context *fsc) { struct super_block *sb = fsc->root->d_sb; sync_filesystem(sb); if (fsc->sb_flags & SB_MANDLOCK) return -EINVAL; return 0; } /* * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down * so that it will fit. */ static ino_t fuse_squash_ino(u64 ino64) { ino_t ino = (ino_t) ino64; if (sizeof(ino_t) < sizeof(u64)) ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; return ino; } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u32 cache_mask) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); lockdep_assert_held(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; /* Clear basic stats from invalid mask */ set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); set_nlink(inode, attr->nlink); inode->i_uid = make_kuid(fc->user_ns, attr->uid); inode->i_gid = make_kgid(fc->user_ns, attr->gid); inode->i_blocks = attr->blocks; /* Sanitize nsecs */ attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); inode_set_atime(inode, attr->atime, attr->atimensec); /* mtime from server may be stale due to local buffered write */ if (!(cache_mask & STATX_MTIME)) { inode_set_mtime(inode, attr->mtime, attr->mtimensec); } if (!(cache_mask & STATX_CTIME)) { inode_set_ctime(inode, attr->ctime, attr->ctimensec); } if (sx) { /* Sanitize nsecs */ sx->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); /* * Btime has been queried, cache is valid (whether or not btime * is available or not) so clear STATX_BTIME from inval_mask. * * Availability of the btime attribute is indicated in * FUSE_I_BTIME */ set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); if (sx->mask & STATX_BTIME) { set_bit(FUSE_I_BTIME, &fi->state); fi->i_btime.tv_sec = sx->btime.tv_sec; fi->i_btime.tv_nsec = sx->btime.tv_nsec; } } if (attr->blksize != 0) inode->i_blkbits = ilog2(attr->blksize); else inode->i_blkbits = inode->i_sb->s_blocksize_bits; /* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the * check in may_delete(). */ fi->orig_i_mode = inode->i_mode; if (!fc->default_permissions) inode->i_mode &= ~S_ISVTX; fi->orig_ino = attr->ino; /* * We are refreshing inode data and it is possible that another * client set suid/sgid or security.capability xattr. So clear * S_NOSEC. Ideally, we could have cleared it only if suid/sgid * was set or if security.capability xattr was set. But we don't * know if security.capability has been set or not. So clear it * anyway. Its less efficient but should be safe. */ inode->i_flags &= ~S_NOSEC; } u32 fuse_get_cache_mask(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) return 0; return STATX_MTIME | STATX_CTIME | STATX_SIZE; } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); u32 cache_mask; loff_t oldsize; struct timespec64 old_mtime; spin_lock(&fi->lock); /* * In case of writeback_cache enabled, writes update mtime, ctime and * may update i_size. In these cases trust the cached value in the * inode. */ cache_mask = fuse_get_cache_mask(inode); if (cache_mask & STATX_SIZE) attr->size = i_size_read(inode); if (cache_mask & STATX_MTIME) { attr->mtime = inode_get_mtime_sec(inode); attr->mtimensec = inode_get_mtime_nsec(inode); } if (cache_mask & STATX_CTIME) { attr->ctime = inode_get_ctime_sec(inode); attr->ctimensec = inode_get_ctime_nsec(inode); } if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fi->lock); return; } old_mtime = inode_get_mtime(inode); fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask); oldsize = inode->i_size; /* * In case of writeback_cache enabled, the cached writes beyond EOF * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ if (!(cache_mask & STATX_SIZE)) i_size_write(inode, attr->size); spin_unlock(&fi->lock); if (!cache_mask && S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { truncate_pagecache(inode, attr->size); if (!fc->explicit_inval_data) inval = true; } else if (fc->auto_inval_data) { struct timespec64 new_mtime = { .tv_sec = attr->mtime, .tv_nsec = attr->mtimensec, }; /* * Auto inval mode also checks and invalidates if mtime * has changed. */ if (!timespec64_equal(&old_mtime, &new_mtime)) inval = true; } if (inval) invalidate_inode_pages2(inode->i_mapping); } if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_dontcache(inode, attr->flags); } static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, u64 nodeid) { sl->nodeid = nodeid; refcount_set(&sl->count, 1); } static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc) { inode->i_mode = attr->mode & S_IFMT; inode->i_size = attr->size; inode_set_mtime(inode, attr->mtime, attr->mtimensec); inode_set_ctime(inode, attr->ctime, attr->ctimensec); if (S_ISREG(inode->i_mode)) { fuse_init_common(inode); fuse_init_file_inode(inode, attr->flags); } else if (S_ISDIR(inode->i_mode)) fuse_init_dir(inode); else if (S_ISLNK(inode->i_mode)) fuse_init_symlink(inode); else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { fuse_init_common(inode); init_special_inode(inode, inode->i_mode, new_decode_dev(attr->rdev)); } else BUG(); /* * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL * so they see the exact same behavior as before. */ if (!fc->posix_acl) inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; } static int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) return 1; else return 0; } static int fuse_inode_set(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; get_fuse_inode(inode)->nodeid = nodeid; return 0; } struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct inode *inode; struct fuse_inode *fi; struct fuse_conn *fc = get_fuse_conn_super(sb); /* * Auto mount points get their node id from the submount root, which is * not a unique identifier within this filesystem. * * To avoid conflicts, do not place submount points into the inode hash * table. */ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && S_ISDIR(attr->mode)) { struct fuse_inode *fi; inode = new_inode(sb); if (!inode) return NULL; fuse_init_inode(inode, attr, fc); fi = get_fuse_inode(inode); fi->nodeid = nodeid; fi->submount_lookup = fuse_alloc_submount_lookup(); if (!fi->submount_lookup) { iput(inode); return NULL; } /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ fuse_init_submount_lookup(fi->submount_lookup, nodeid); inode->i_flags |= S_AUTOMOUNT; goto done; } retry: inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); if (!inode) return NULL; if ((inode->i_state & I_NEW)) { inode->i_flags |= S_NOATIME; if (!fc->writeback_cache || !S_ISREG(attr->mode)) inode->i_flags |= S_NOCMTIME; inode->i_generation = generation; fuse_init_inode(inode, attr, fc); unlock_new_inode(inode); } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); iput(inode); goto retry; } fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); done: fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); return inode; } struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, struct fuse_mount **fm) { struct fuse_mount *fm_iter; struct inode *inode; WARN_ON(!rwsem_is_locked(&fc->killsb)); list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { if (!fm_iter->sb) continue; inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); if (inode) { if (fm) *fm = fm_iter; return inode; } } return NULL; } int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, loff_t offset, loff_t len) { struct fuse_inode *fi; struct inode *inode; pgoff_t pg_start; pgoff_t pg_end; inode = fuse_ilookup(fc, nodeid, NULL); if (!inode) return -ENOENT; fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); spin_unlock(&fi->lock); fuse_invalidate_attr(inode); forget_all_cached_acls(inode); if (offset >= 0) { pg_start = offset >> PAGE_SHIFT; if (len <= 0) pg_end = -1; else pg_end = (offset + len - 1) >> PAGE_SHIFT; invalidate_inode_pages2_range(inode->i_mapping, pg_start, pg_end); } iput(inode); return 0; } bool fuse_lock_inode(struct inode *inode) { bool locked = false; if (!get_fuse_conn(inode)->parallel_dirops) { mutex_lock(&get_fuse_inode(inode)->mutex); locked = true; } return locked; } void fuse_unlock_inode(struct inode *inode, bool locked) { if (locked) mutex_unlock(&get_fuse_inode(inode)->mutex); } static void fuse_umount_begin(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); if (fc->no_force_umount) return; fuse_abort_conn(fc); // Only retire block-device-based superblocks. if (sb->s_bdev != NULL) retire_super(sb); } static void fuse_send_destroy(struct fuse_mount *fm) { if (fm->fc->conn_init) { FUSE_ARGS(args); args.opcode = FUSE_DESTROY; args.force = true; args.nocreds = true; fuse_simple_request(fm, &args); } } static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) { stbuf->f_type = FUSE_SUPER_MAGIC; stbuf->f_bsize = attr->bsize; stbuf->f_frsize = attr->frsize; stbuf->f_blocks = attr->blocks; stbuf->f_bfree = attr->bfree; stbuf->f_bavail = attr->bavail; stbuf->f_files = attr->files; stbuf->f_ffree = attr->ffree; stbuf->f_namelen = attr->namelen; /* fsid is left zero */ } static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct fuse_mount *fm = get_fuse_mount_super(sb); FUSE_ARGS(args); struct fuse_statfs_out outarg; int err; if (!fuse_allow_current_process(fm->fc)) { buf->f_type = FUSE_SUPER_MAGIC; return 0; } memset(&outarg, 0, sizeof(outarg)); args.in_numargs = 0; args.opcode = FUSE_STATFS; args.nodeid = get_node_id(d_inode(dentry)); args.out_numargs = 1; args.out_args[0].size = sizeof(outarg); args.out_args[0].value = &outarg; err = fuse_simple_request(fm, &args); if (!err) convert_fuse_statfs(buf, &outarg.st); return err; } static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) { struct fuse_sync_bucket *bucket; bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL); if (bucket) { init_waitqueue_head(&bucket->waitq); /* Initial active count */ atomic_set(&bucket->count, 1); } return bucket; } static void fuse_sync_fs_writes(struct fuse_conn *fc) { struct fuse_sync_bucket *bucket, *new_bucket; int count; new_bucket = fuse_sync_bucket_alloc(); spin_lock(&fc->lock); bucket = rcu_dereference_protected(fc->curr_bucket, 1); count = atomic_read(&bucket->count); WARN_ON(count < 1); /* No outstanding writes? */ if (count == 1) { spin_unlock(&fc->lock); kfree(new_bucket); return; } /* * Completion of new bucket depends on completion of this bucket, so add * one more count. */ atomic_inc(&new_bucket->count); rcu_assign_pointer(fc->curr_bucket, new_bucket); spin_unlock(&fc->lock); /* * Drop initial active count. At this point if all writes in this and * ancestor buckets complete, the count will go to zero and this task * will be woken up. */ atomic_dec(&bucket->count); wait_event(bucket->waitq, atomic_read(&bucket->count) == 0); /* Drop temp count on descendant bucket */ fuse_sync_bucket_dec(new_bucket); kfree_rcu(bucket, rcu); } static int fuse_sync_fs(struct super_block *sb, int wait) { struct fuse_mount *fm = get_fuse_mount_super(sb); struct fuse_conn *fc = fm->fc; struct fuse_syncfs_in inarg; FUSE_ARGS(args); int err; /* * Userspace cannot handle the wait == 0 case. Avoid a * gratuitous roundtrip. */ if (!wait) return 0; /* The filesystem is being unmounted. Nothing to do. */ if (!sb->s_root) return 0; if (!fc->sync_fs) return 0; fuse_sync_fs_writes(fc); memset(&inarg, 0, sizeof(inarg)); args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.opcode = FUSE_SYNCFS; args.nodeid = get_node_id(sb->s_root->d_inode); args.out_numargs = 0; err = fuse_simple_request(fm, &args); if (err == -ENOSYS) { fc->sync_fs = 0; err = 0; } return err; } enum { OPT_SOURCE, OPT_SUBTYPE, OPT_FD, OPT_ROOTMODE, OPT_USER_ID, OPT_GROUP_ID, OPT_DEFAULT_PERMISSIONS, OPT_ALLOW_OTHER, OPT_MAX_READ, OPT_BLKSIZE, OPT_ERR }; static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_string ("source", OPT_SOURCE), fsparam_u32 ("fd", OPT_FD), fsparam_u32oct ("rootmode", OPT_ROOTMODE), fsparam_u32 ("user_id", OPT_USER_ID), fsparam_u32 ("group_id", OPT_GROUP_ID), fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), fsparam_flag ("allow_other", OPT_ALLOW_OTHER), fsparam_u32 ("max_read", OPT_MAX_READ), fsparam_u32 ("blksize", OPT_BLKSIZE), fsparam_string ("subtype", OPT_SUBTYPE), {} }; static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) { struct fs_parse_result result; struct fuse_fs_context *ctx = fsc->fs_private; int opt; if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { /* * Ignore options coming from mount(MS_REMOUNT) for backward * compatibility. */ if (fsc->oldapi) return 0; return invalfc(fsc, "No changes allowed in reconfigure"); } opt = fs_parse(fsc, fuse_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case OPT_SOURCE: if (fsc->source) return invalfc(fsc, "Multiple sources specified"); fsc->source = param->string; param->string = NULL; break; case OPT_SUBTYPE: if (ctx->subtype) return invalfc(fsc, "Multiple subtypes specified"); ctx->subtype = param->string; param->string = NULL; return 0; case OPT_FD: ctx->fd = result.uint_32; ctx->fd_present = true; break; case OPT_ROOTMODE: if (!fuse_valid_type(result.uint_32)) return invalfc(fsc, "Invalid rootmode"); ctx->rootmode = result.uint_32; ctx->rootmode_present = true; break; case OPT_USER_ID: ctx->user_id = make_kuid(fsc->user_ns, result.uint_32); if (!uid_valid(ctx->user_id)) return invalfc(fsc, "Invalid user_id"); ctx->user_id_present = true; break; case OPT_GROUP_ID: ctx->group_id = make_kgid(fsc->user_ns, result.uint_32); if (!gid_valid(ctx->group_id)) return invalfc(fsc, "Invalid group_id"); ctx->group_id_present = true; break; case OPT_DEFAULT_PERMISSIONS: ctx->default_permissions = true; break; case OPT_ALLOW_OTHER: ctx->allow_other = true; break; case OPT_MAX_READ: ctx->max_read = result.uint_32; break; case OPT_BLKSIZE: if (!ctx->is_bdev) return invalfc(fsc, "blksize only supported for fuseblk"); ctx->blksize = result.uint_32; break; default: return -EINVAL; } return 0; } static void fuse_free_fsc(struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; if (ctx) { kfree(ctx->subtype); kfree(ctx); } } static int fuse_show_options(struct seq_file *m, struct dentry *root) { struct super_block *sb = root->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); if (fc->legacy_opts_show) { seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); if (fc->default_permissions) seq_puts(m, ",default_permissions"); if (fc->allow_other) seq_puts(m, ",allow_other"); if (fc->max_read != ~0) seq_printf(m, ",max_read=%u", fc->max_read); if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) seq_printf(m, ",blksize=%lu", sb->s_blocksize); } #ifdef CONFIG_FUSE_DAX if (fc->dax_mode == FUSE_DAX_ALWAYS) seq_puts(m, ",dax=always"); else if (fc->dax_mode == FUSE_DAX_NEVER) seq_puts(m, ",dax=never"); else if (fc->dax_mode == FUSE_DAX_INODE_USER) seq_puts(m, ",dax=inode"); #endif return 0; } static void fuse_iqueue_init(struct fuse_iqueue *fiq, const struct fuse_iqueue_ops *ops, void *priv) { memset(fiq, 0, sizeof(struct fuse_iqueue)); spin_lock_init(&fiq->lock); init_waitqueue_head(&fiq->waitq); INIT_LIST_HEAD(&fiq->pending); INIT_LIST_HEAD(&fiq->interrupts); fiq->forget_list_tail = &fiq->forget_list_head; fiq->connected = 1; fiq->ops = ops; fiq->priv = priv; } static void fuse_pqueue_init(struct fuse_pqueue *fpq) { unsigned int i; spin_lock_init(&fpq->lock); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) INIT_LIST_HEAD(&fpq->processing[i]); INIT_LIST_HEAD(&fpq->io); fpq->connected = 1; } void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, struct user_namespace *user_ns, const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); spin_lock_init(&fc->bg_lock); init_rwsem(&fc->killsb); refcount_set(&fc->count, 1); atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); INIT_LIST_HEAD(&fc->devices); atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; atomic64_set(&fc->khctr, 0); fc->polled_files = RB_ROOT; fc->blocked = 0; fc->initialized = 0; fc->connected = 1; atomic64_set(&fc->attr_version, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; fc->max_pages_limit = FUSE_MAX_MAX_PAGES; INIT_LIST_HEAD(&fc->mounts); list_add(&fm->fc_entry, &fc->mounts); fm->fc = fc; } EXPORT_SYMBOL_GPL(fuse_conn_init); static void delayed_release(struct rcu_head *p) { struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); put_user_ns(fc->user_ns); fc->release(fc); } void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { struct fuse_iqueue *fiq = &fc->iq; struct fuse_sync_bucket *bucket; if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); if (fiq->ops->release) fiq->ops->release(fiq); put_pid_ns(fc->pid_ns); bucket = rcu_dereference_protected(fc->curr_bucket, 1); if (bucket) { WARN_ON(atomic_read(&bucket->count) != 1); kfree(bucket); } call_rcu(&fc->rcu, delayed_release); } } EXPORT_SYMBOL_GPL(fuse_conn_put); struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) { refcount_inc(&fc->count); return fc; } EXPORT_SYMBOL_GPL(fuse_conn_get); static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) { struct fuse_attr attr; memset(&attr, 0, sizeof(attr)); attr.mode = mode; attr.ino = FUSE_ROOT_ID; attr.nlink = 1; return fuse_iget(sb, 1, 0, &attr, 0, 0); } struct fuse_inode_handle { u64 nodeid; u32 generation; }; static struct dentry *fuse_get_dentry(struct super_block *sb, struct fuse_inode_handle *handle) { struct fuse_conn *fc = get_fuse_conn_super(sb); struct inode *inode; struct dentry *entry; int err = -ESTALE; if (handle->nodeid == 0) goto out_err; inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); if (!inode) { struct fuse_entry_out outarg; const struct qstr name = QSTR_INIT(".", 1); if (!fc->export_support) goto out_err; err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, &inode); if (err && err != -ENOENT) goto out_err; if (err || !inode) { err = -ESTALE; goto out_err; } err = -EIO; if (get_node_id(inode) != handle->nodeid) goto out_iput; } err = -ESTALE; if (inode->i_generation != handle->generation) goto out_iput; entry = d_obtain_alias(inode); if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) fuse_invalidate_entry_cache(entry); return entry; out_iput: iput(inode); out_err: return ERR_PTR(err); } static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, struct inode *parent) { int len = parent ? 6 : 3; u64 nodeid; u32 generation; if (*max_len < len) { *max_len = len; return FILEID_INVALID; } nodeid = get_fuse_inode(inode)->nodeid; generation = inode->i_generation; fh[0] = (u32)(nodeid >> 32); fh[1] = (u32)(nodeid & 0xffffffff); fh[2] = generation; if (parent) { nodeid = get_fuse_inode(parent)->nodeid; generation = parent->i_generation; fh[3] = (u32)(nodeid >> 32); fh[4] = (u32)(nodeid & 0xffffffff); fh[5] = generation; } *max_len = len; return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; } static struct dentry *fuse_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { struct fuse_inode_handle handle; if ((fh_type != FILEID_INO64_GEN && fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) return NULL; handle.nodeid = (u64) fid->raw[0] << 32; handle.nodeid |= (u64) fid->raw[1]; handle.generation = fid->raw[2]; return fuse_get_dentry(sb, &handle); } static struct dentry *fuse_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { struct fuse_inode_handle parent; if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) return NULL; parent.nodeid = (u64) fid->raw[3] << 32; parent.nodeid |= (u64) fid->raw[4]; parent.generation = fid->raw[5]; return fuse_get_dentry(sb, &parent); } static struct dentry *fuse_get_parent(struct dentry *child) { struct inode *child_inode = d_inode(child); struct fuse_conn *fc = get_fuse_conn(child_inode); struct inode *inode; struct dentry *parent; struct fuse_entry_out outarg; int err; if (!fc->export_support) return ERR_PTR(-ESTALE); err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), &dotdot_name, &outarg, &inode); if (err) { if (err == -ENOENT) return ERR_PTR(-ESTALE); return ERR_PTR(err); } parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) fuse_invalidate_entry_cache(parent); return parent; } static const struct export_operations fuse_export_operations = { .fh_to_dentry = fuse_fh_to_dentry, .fh_to_parent = fuse_fh_to_parent, .encode_fh = fuse_encode_fh, .get_parent = fuse_get_parent, }; static const struct super_operations fuse_super_operations = { .alloc_inode = fuse_alloc_inode, .free_inode = fuse_free_inode, .evict_inode = fuse_evict_inode, .write_inode = fuse_write_inode, .drop_inode = generic_delete_inode, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, .sync_fs = fuse_sync_fs, .show_options = fuse_show_options, }; static void sanitize_global_limit(unsigned *limit) { /* * The default maximum number of async requests is calculated to consume * 1/2^13 of the total memory, assuming 392 bytes per request. */ if (*limit == 0) *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; if (*limit >= 1 << 16) *limit = (1 << 16) - 1; } static int set_global_limit(const char *val, const struct kernel_param *kp) { int rv; rv = param_set_uint(val, kp); if (rv) return rv; sanitize_global_limit((unsigned *)kp->arg); return 0; } static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) { int cap_sys_admin = capable(CAP_SYS_ADMIN); if (arg->minor < 13) return; sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_congthresh); spin_lock(&fc->bg_lock); if (arg->max_background) { fc->max_background = arg->max_background; if (!cap_sys_admin && fc->max_background > max_user_bgreq) fc->max_background = max_user_bgreq; } if (arg->congestion_threshold) { fc->congestion_threshold = arg->congestion_threshold; if (!cap_sys_admin && fc->congestion_threshold > max_user_congthresh) fc->congestion_threshold = max_user_congthresh; } spin_unlock(&fc->bg_lock); } struct fuse_init_args { struct fuse_args args; struct fuse_init_in in; struct fuse_init_out out; }; static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, int error) { struct fuse_conn *fc = fm->fc; struct fuse_init_args *ia = container_of(args, typeof(*ia), args); struct fuse_init_out *arg = &ia->out; bool ok = true; if (error || arg->major != FUSE_KERNEL_VERSION) ok = false; else { unsigned long ra_pages; process_init_limits(fc, arg); if (arg->minor >= 6) { u64 flags = arg->flags; if (flags & FUSE_INIT_EXT) flags |= (u64) arg->flags2 << 32; ra_pages = arg->max_readahead / PAGE_SIZE; if (flags & FUSE_ASYNC_READ) fc->async_read = 1; if (!(flags & FUSE_POSIX_LOCKS)) fc->no_lock = 1; if (arg->minor >= 17) { if (!(flags & FUSE_FLOCK_LOCKS)) fc->no_flock = 1; } else { if (!(flags & FUSE_POSIX_LOCKS)) fc->no_flock = 1; } if (flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; if (arg->minor >= 9) { /* LOOKUP has dependency on proto version */ if (flags & FUSE_EXPORT_SUPPORT) fc->export_support = 1; } if (flags & FUSE_BIG_WRITES) fc->big_writes = 1; if (flags & FUSE_DONT_MASK) fc->dont_mask = 1; if (flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; else if (flags & FUSE_EXPLICIT_INVAL_DATA) fc->explicit_inval_data = 1; if (flags & FUSE_DO_READDIRPLUS) { fc->do_readdirplus = 1; if (flags & FUSE_READDIRPLUS_AUTO) fc->readdirplus_auto = 1; } if (flags & FUSE_ASYNC_DIO) fc->async_dio = 1; if (flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; if (flags & FUSE_PARALLEL_DIROPS) fc->parallel_dirops = 1; if (flags & FUSE_HANDLE_KILLPRIV) fc->handle_killpriv = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fm->sb->s_time_gran = arg->time_gran; if ((flags & FUSE_POSIX_ACL)) { fc->default_permissions = 1; fc->posix_acl = 1; } if (flags & FUSE_CACHE_SYMLINKS) fc->cache_symlinks = 1; if (flags & FUSE_ABORT_ERROR) fc->abort_err = 1; if (flags & FUSE_MAX_PAGES) { fc->max_pages = min_t(unsigned int, fc->max_pages_limit, max_t(unsigned int, arg->max_pages, 1)); } if (IS_ENABLED(CONFIG_FUSE_DAX)) { if (flags & FUSE_MAP_ALIGNMENT && !fuse_dax_check_alignment(fc, arg->map_alignment)) { ok = false; } if (flags & FUSE_HAS_INODE_DAX) fc->inode_dax = 1; } if (flags & FUSE_HANDLE_KILLPRIV_V2) { fc->handle_killpriv_v2 = 1; fm->sb->s_flags |= SB_NOSEC; } if (flags & FUSE_SETXATTR_EXT) fc->setxattr_ext = 1; if (flags & FUSE_SECURITY_CTX) fc->init_security = 1; if (flags & FUSE_CREATE_SUPP_GROUP) fc->create_supp_group = 1; if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) fc->direct_io_allow_mmap = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; fc->no_flock = 1; } fm->sb->s_bdi->ra_pages = min(fm->sb->s_bdi->ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } kfree(ia); if (!ok) { fc->conn_init = 0; fc->conn_error = 1; } fuse_set_initialized(fc); wake_up_all(&fc->blocked_waitq); } void fuse_send_init(struct fuse_mount *fm) { struct fuse_init_args *ia; u64 flags; ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); ia->in.major = FUSE_KERNEL_VERSION; ia->in.minor = FUSE_KERNEL_MINOR_VERSION; ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; if (fuse_is_inode_dax_mode(fm->fc->dax_mode)) flags |= FUSE_HAS_INODE_DAX; #endif if (fm->fc->auto_submounts) flags |= FUSE_SUBMOUNTS; ia->in.flags = flags; ia->in.flags2 = flags >> 32; ia->args.opcode = FUSE_INIT; ia->args.in_numargs = 1; ia->args.in_args[0].size = sizeof(ia->in); ia->args.in_args[0].value = &ia->in; ia->args.out_numargs = 1; /* Variable length argument used for backward compatibility with interface version < 7.5. Rest of init_out is zeroed by do_get_request(), so a short reply is not a problem */ ia->args.out_argvar = true; ia->args.out_args[0].size = sizeof(ia->out); ia->args.out_args[0].value = &ia->out; ia->args.force = true; ia->args.nocreds = true; ia->args.end = process_init_reply; if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) process_init_reply(fm, &ia->args, -ENOTCONN); } EXPORT_SYMBOL_GPL(fuse_send_init); void fuse_free_conn(struct fuse_conn *fc) { WARN_ON(!list_empty(&fc->devices)); kfree(fc); } EXPORT_SYMBOL_GPL(fuse_free_conn); static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) { int err; char *suffix = ""; if (sb->s_bdev) { suffix = "-fuseblk"; /* * sb->s_bdi points to blkdev's bdi however we want to redirect * it to our private bdi... */ bdi_put(sb->s_bdi); sb->s_bdi = &noop_backing_dev_info; } err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), MINOR(fc->dev), suffix); if (err) return err; /* fuse does it's own writeback accounting */ sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; /* * For a single fuse filesystem use max 1% of dirty + * writeback threshold. * * This gives about 1M of write buffer for memory maps on a * machine with 1G and 10% dirty_ratio, which should be more * than enough. * * Privileged users can raise it by writing to * * /sys/class/bdi/<bdi>/max_ratio */ bdi_set_max_ratio(sb->s_bdi, 1); return 0; } struct fuse_dev *fuse_dev_alloc(void) { struct fuse_dev *fud; struct list_head *pq; fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); if (!fud) return NULL; pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!pq) { kfree(fud); return NULL; } fud->pq.processing = pq; fuse_pqueue_init(&fud->pq); return fud; } EXPORT_SYMBOL_GPL(fuse_dev_alloc); void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) { fud->fc = fuse_conn_get(fc); spin_lock(&fc->lock); list_add_tail(&fud->entry, &fc->devices); spin_unlock(&fc->lock); } EXPORT_SYMBOL_GPL(fuse_dev_install); struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) { struct fuse_dev *fud; fud = fuse_dev_alloc(); if (!fud) return NULL; fuse_dev_install(fud, fc); return fud; } EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); void fuse_dev_free(struct fuse_dev *fud) { struct fuse_conn *fc = fud->fc; if (fc) { spin_lock(&fc->lock); list_del(&fud->entry); spin_unlock(&fc->lock); fuse_conn_put(fc); } kfree(fud->pq.processing); kfree(fud); } EXPORT_SYMBOL_GPL(fuse_dev_free); static void fuse_fill_attr_from_inode(struct fuse_attr *attr, const struct fuse_inode *fi) { struct timespec64 atime = inode_get_atime(&fi->inode); struct timespec64 mtime = inode_get_mtime(&fi->inode); struct timespec64 ctime = inode_get_ctime(&fi->inode); *attr = (struct fuse_attr){ .ino = fi->inode.i_ino, .size = fi->inode.i_size, .blocks = fi->inode.i_blocks, .atime = atime.tv_sec, .mtime = mtime.tv_sec, .ctime = ctime.tv_sec, .atimensec = atime.tv_nsec, .mtimensec = mtime.tv_nsec, .ctimensec = ctime.tv_nsec, .mode = fi->inode.i_mode, .nlink = fi->inode.i_nlink, .uid = fi->inode.i_uid.val, .gid = fi->inode.i_gid.val, .rdev = fi->inode.i_rdev, .blksize = 1u << fi->inode.i_blkbits, }; } static void fuse_sb_defaults(struct super_block *sb) { sb->s_magic = FUSE_SUPER_MAGIC; sb->s_op = &fuse_super_operations; sb->s_xattr = fuse_xattr_handlers; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; sb->s_export_op = &fuse_export_operations; sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); } static int fuse_fill_super_submount(struct super_block *sb, struct fuse_inode *parent_fi) { struct fuse_mount *fm = get_fuse_mount_super(sb); struct super_block *parent_sb = parent_fi->inode.i_sb; struct fuse_attr root_attr; struct inode *root; struct fuse_submount_lookup *sl; struct fuse_inode *fi; fuse_sb_defaults(sb); fm->sb = sb; WARN_ON(sb->s_bdi != &noop_backing_dev_info); sb->s_bdi = bdi_get(parent_sb->s_bdi); sb->s_xattr = parent_sb->s_xattr; sb->s_time_gran = parent_sb->s_time_gran; sb->s_blocksize = parent_sb->s_blocksize; sb->s_blocksize_bits = parent_sb->s_blocksize_bits; sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); if (parent_sb->s_subtype && !sb->s_subtype) return -ENOMEM; fuse_fill_attr_from_inode(&root_attr, parent_fi); root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); /* * This inode is just a duplicate, so it is not looked up and * its nlookup should not be incremented. fuse_iget() does * that, though, so undo it here. */ fi = get_fuse_inode(root); fi->nlookup--; sb->s_d_op = &fuse_dentry_operations; sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; /* * Grab the parent's submount_lookup pointer and take a * reference on the shared nlookup from the parent. This is to * prevent the last forget for this nodeid from getting * triggered until all users have finished with it. */ sl = parent_fi->submount_lookup; WARN_ON(!sl); if (sl) { refcount_inc(&sl->count); fi->submount_lookup = sl; } return 0; } /* Filesystem context private data holds the FUSE inode of the mount point */ static int fuse_get_tree_submount(struct fs_context *fsc) { struct fuse_mount *fm; struct fuse_inode *mp_fi = fsc->fs_private; struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode); struct super_block *sb; int err; fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); if (!fm) return -ENOMEM; fm->fc = fuse_conn_get(fc); fsc->s_fs_info = fm; sb = sget_fc(fsc, NULL, set_anon_super_fc); if (fsc->s_fs_info) fuse_mount_destroy(fm); if (IS_ERR(sb)) return PTR_ERR(sb); /* Initialize superblock, making @mp_fi its root */ err = fuse_fill_super_submount(sb, mp_fi); if (err) { deactivate_locked_super(sb); return err; } down_write(&fc->killsb); list_add_tail(&fm->fc_entry, &fc->mounts); up_write(&fc->killsb); sb->s_flags |= SB_ACTIVE; fsc->root = dget(sb->s_root); return 0; } static const struct fs_context_operations fuse_context_submount_ops = { .get_tree = fuse_get_tree_submount, }; int fuse_init_fs_context_submount(struct fs_context *fsc) { fsc->ops = &fuse_context_submount_ops; return 0; } EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) { struct fuse_dev *fud = NULL; struct fuse_mount *fm = get_fuse_mount_super(sb); struct fuse_conn *fc = fm->fc; struct inode *root; struct dentry *root_dentry; int err; err = -EINVAL; if (sb->s_flags & SB_MANDLOCK) goto err; rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc()); fuse_sb_defaults(sb); if (ctx->is_bdev) { #ifdef CONFIG_BLOCK err = -EINVAL; if (!sb_set_blocksize(sb, ctx->blksize)) goto err; #endif } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; } sb->s_subtype = ctx->subtype; ctx->subtype = NULL; if (IS_ENABLED(CONFIG_FUSE_DAX)) { err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev); if (err) goto err; } if (ctx->fudptr) { err = -ENOMEM; fud = fuse_dev_alloc_install(fc); if (!fud) goto err_free_dax; } fc->dev = sb->s_dev; fm->sb = sb; err = fuse_bdi_init(fc, sb); if (err) goto err_dev_free; /* Handle umasking inside the fuse code */ if (sb->s_flags & SB_POSIXACL) fc->dont_mask = 1; sb->s_flags |= SB_POSIXACL; fc->default_permissions = ctx->default_permissions; fc->allow_other = ctx->allow_other; fc->user_id = ctx->user_id; fc->group_id = ctx->group_id; fc->legacy_opts_show = ctx->legacy_opts_show; fc->max_read = max_t(unsigned int, 4096, ctx->max_read); fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); sb->s_d_op = &fuse_root_dentry_operations; root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; /* Root dentry doesn't have .d_revalidate */ sb->s_d_op = &fuse_dentry_operations; mutex_lock(&fuse_mutex); err = -EINVAL; if (ctx->fudptr && *ctx->fudptr) goto err_unlock; err = fuse_ctl_add_conn(fc); if (err) goto err_unlock; list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; if (ctx->fudptr) *ctx->fudptr = fud; mutex_unlock(&fuse_mutex); return 0; err_unlock: mutex_unlock(&fuse_mutex); dput(root_dentry); err_dev_free: if (fud) fuse_dev_free(fud); err_free_dax: if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); err: return err; } EXPORT_SYMBOL_GPL(fuse_fill_super_common); static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; int err; if (!ctx->file || !ctx->rootmode_present || !ctx->user_id_present || !ctx->group_id_present) return -EINVAL; /* * Require mount to happen from the same user namespace which * opened /dev/fuse to prevent potential attacks. */ if ((ctx->file->f_op != &fuse_dev_operations) || (ctx->file->f_cred->user_ns != sb->s_user_ns)) return -EINVAL; ctx->fudptr = &ctx->file->private_data; err = fuse_fill_super_common(sb, ctx); if (err) return err; /* file->private_data shall be visible on all CPUs after this */ smp_mb(); fuse_send_init(get_fuse_mount_super(sb)); return 0; } /* * This is the path where user supplied an already initialized fuse dev. In * this case never create a new super if the old one is gone. */ static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) { return -ENOTCONN; } static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) { return fsc->sget_key == get_fuse_conn_super(sb); } static int fuse_get_tree(struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; struct fuse_dev *fud; struct fuse_conn *fc; struct fuse_mount *fm; struct super_block *sb; int err; fc = kmalloc(sizeof(*fc), GFP_KERNEL); if (!fc) return -ENOMEM; fm = kzalloc(sizeof(*fm), GFP_KERNEL); if (!fm) { kfree(fc); return -ENOMEM; } fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); fc->release = fuse_free_conn; fsc->s_fs_info = fm; if (ctx->fd_present) ctx->file = fget(ctx->fd); if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { err = get_tree_bdev(fsc, fuse_fill_super); goto out; } /* * While block dev mount can be initialized with a dummy device fd * (found by device name), normal fuse mounts can't */ err = -EINVAL; if (!ctx->file) goto out; /* * Allow creating a fuse mount with an already initialized fuse * connection */ fud = READ_ONCE(ctx->file->private_data); if (ctx->file->f_op == &fuse_dev_operations && fud) { fsc->sget_key = fud->fc; sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); err = PTR_ERR_OR_ZERO(sb); if (!IS_ERR(sb)) fsc->root = dget(sb->s_root); } else { err = get_tree_nodev(fsc, fuse_fill_super); } out: if (fsc->s_fs_info) fuse_mount_destroy(fm); if (ctx->file) fput(ctx->file); return err; } static const struct fs_context_operations fuse_context_ops = { .free = fuse_free_fsc, .parse_param = fuse_parse_param, .reconfigure = fuse_reconfigure, .get_tree = fuse_get_tree, }; /* * Set up the filesystem mount context. */ static int fuse_init_fs_context(struct fs_context *fsc) { struct fuse_fs_context *ctx; ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->max_read = ~0; ctx->blksize = FUSE_DEFAULT_BLKSIZE; ctx->legacy_opts_show = true; #ifdef CONFIG_BLOCK if (fsc->fs_type == &fuseblk_fs_type) { ctx->is_bdev = true; ctx->destroy = true; } #endif fsc->fs_private = ctx; fsc->ops = &fuse_context_ops; return 0; } bool fuse_mount_remove(struct fuse_mount *fm) { struct fuse_conn *fc = fm->fc; bool last = false; down_write(&fc->killsb); list_del_init(&fm->fc_entry); if (list_empty(&fc->mounts)) last = true; up_write(&fc->killsb); return last; } EXPORT_SYMBOL_GPL(fuse_mount_remove); void fuse_conn_destroy(struct fuse_mount *fm) { struct fuse_conn *fc = fm->fc; if (fc->destroy) fuse_send_destroy(fm); fuse_abort_conn(fc); fuse_wait_aborted(fc); if (!list_empty(&fc->entry)) { mutex_lock(&fuse_mutex); list_del(&fc->entry); fuse_ctl_remove_conn(fc); mutex_unlock(&fuse_mutex); } } EXPORT_SYMBOL_GPL(fuse_conn_destroy); static void fuse_sb_destroy(struct super_block *sb) { struct fuse_mount *fm = get_fuse_mount_super(sb); bool last; if (sb->s_root) { last = fuse_mount_remove(fm); if (last) fuse_conn_destroy(fm); } } void fuse_mount_destroy(struct fuse_mount *fm) { fuse_conn_put(fm->fc); kfree_rcu(fm, rcu); } EXPORT_SYMBOL(fuse_mount_destroy); static void fuse_kill_sb_anon(struct super_block *sb) { fuse_sb_destroy(sb); kill_anon_super(sb); fuse_mount_destroy(get_fuse_mount_super(sb)); } static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, .init_fs_context = fuse_init_fs_context, .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_anon, }; MODULE_ALIAS_FS("fuse"); #ifdef CONFIG_BLOCK static void fuse_kill_sb_blk(struct super_block *sb) { fuse_sb_destroy(sb); kill_block_super(sb); fuse_mount_destroy(get_fuse_mount_super(sb)); } static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .init_fs_context = fuse_init_fs_context, .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; MODULE_ALIAS_FS("fuseblk"); static inline int register_fuseblk(void) { return register_filesystem(&fuseblk_fs_type); } static inline void unregister_fuseblk(void) { unregister_filesystem(&fuseblk_fs_type); } #else static inline int register_fuseblk(void) { return 0; } static inline void unregister_fuseblk(void) { } #endif static void fuse_inode_init_once(void *foo) { struct inode *inode = foo; inode_init_once(inode); } static int __init fuse_fs_init(void) { int err; fuse_inode_cachep = kmem_cache_create("fuse_inode", sizeof(struct fuse_inode), 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, fuse_inode_init_once); err = -ENOMEM; if (!fuse_inode_cachep) goto out; err = register_fuseblk(); if (err) goto out2; err = register_filesystem(&fuse_fs_type); if (err) goto out3; return 0; out3: unregister_fuseblk(); out2: kmem_cache_destroy(fuse_inode_cachep); out: return err; } static void fuse_fs_cleanup(void) { unregister_filesystem(&fuse_fs_type); unregister_fuseblk(); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(fuse_inode_cachep); } static struct kobject *fuse_kobj; static int fuse_sysfs_init(void) { int err; fuse_kobj = kobject_create_and_add("fuse", fs_kobj); if (!fuse_kobj) { err = -ENOMEM; goto out_err; } err = sysfs_create_mount_point(fuse_kobj, "connections"); if (err) goto out_fuse_unregister; return 0; out_fuse_unregister: kobject_put(fuse_kobj); out_err: return err; } static void fuse_sysfs_cleanup(void) { sysfs_remove_mount_point(fuse_kobj, "connections"); kobject_put(fuse_kobj); } static int __init fuse_init(void) { int res; pr_info("init (API version %i.%i)\n", FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); INIT_LIST_HEAD(&fuse_conn_list); res = fuse_fs_init(); if (res) goto err; res = fuse_dev_init(); if (res) goto err_fs_cleanup; res = fuse_sysfs_init(); if (res) goto err_dev_cleanup; res = fuse_ctl_init(); if (res) goto err_sysfs_cleanup; sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_congthresh); return 0; err_sysfs_cleanup: fuse_sysfs_cleanup(); err_dev_cleanup: fuse_dev_cleanup(); err_fs_cleanup: fuse_fs_cleanup(); err: return res; } static void __exit fuse_exit(void) { pr_debug("exit\n"); fuse_ctl_cleanup(); fuse_sysfs_cleanup(); fuse_fs_cleanup(); fuse_dev_cleanup(); } module_init(fuse_init); module_exit(fuse_exit); |
7 268 98 291 55 1 285 89 266 7 97 170 286 170 218 102 2 101 98 7 98 48 98 97 6 7 89 268 290 98 54 2 279 102 291 318 58 282 317 317 39 8 32 32 32 30 1 21 18 85 113 183 9 165 10 10 164 157 3 154 179 6 173 105 52 109 4 55 3 36 188 187 166 80 165 80 188 10 177 6 2 98 27 27 17 10 14 14 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Symmetric key cipher operations. * * Generic encrypt/decrypt wrapper for ciphers, handles operations across * multiple page boundaries by using temporary blocks. In user context, * the kernel is given a chance to schedule us once per page. * * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/aead.h> #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/bug.h> #include <linux/cryptouser.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> #include <net/netlink.h> #include "skcipher.h" #define CRYPTO_ALG_TYPE_SKCIPHER_MASK 0x0000000e enum { SKCIPHER_WALK_PHYS = 1 << 0, SKCIPHER_WALK_SLOW = 1 << 1, SKCIPHER_WALK_COPY = 1 << 2, SKCIPHER_WALK_DIFF = 1 << 3, SKCIPHER_WALK_SLEEP = 1 << 4, }; struct skcipher_walk_buffer { struct list_head entry; struct scatter_walk dst; unsigned int len; u8 *data; u8 buffer[]; }; static const struct crypto_type crypto_skcipher_type; static int skcipher_walk_next(struct skcipher_walk *walk); static inline void skcipher_map_src(struct skcipher_walk *walk) { walk->src.virt.addr = scatterwalk_map(&walk->in); } static inline void skcipher_map_dst(struct skcipher_walk *walk) { walk->dst.virt.addr = scatterwalk_map(&walk->out); } static inline void skcipher_unmap_src(struct skcipher_walk *walk) { scatterwalk_unmap(walk->src.virt.addr); } static inline void skcipher_unmap_dst(struct skcipher_walk *walk) { scatterwalk_unmap(walk->dst.virt.addr); } static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk) { return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC; } /* Get a spot of the specified length that does not straddle a page. * The caller needs to ensure that there is enough space for this operation. */ static inline u8 *skcipher_get_spot(u8 *start, unsigned int len) { u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK); return max(start, end_page); } static inline struct skcipher_alg *__crypto_skcipher_alg( struct crypto_alg *alg) { return container_of(alg, struct skcipher_alg, base); } static inline struct crypto_istat_cipher *skcipher_get_stat( struct skcipher_alg *alg) { return skcipher_get_stat_common(&alg->co); } static inline int crypto_skcipher_errstat(struct skcipher_alg *alg, int err) { struct crypto_istat_cipher *istat = skcipher_get_stat(alg); if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) return err; if (err && err != -EINPROGRESS && err != -EBUSY) atomic64_inc(&istat->err_cnt); return err; } static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) { u8 *addr; addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); addr = skcipher_get_spot(addr, bsize); scatterwalk_copychunks(addr, &walk->out, bsize, (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1); return 0; } int skcipher_walk_done(struct skcipher_walk *walk, int err) { unsigned int n = walk->nbytes; unsigned int nbytes = 0; if (!n) goto finish; if (likely(err >= 0)) { n -= err; nbytes = walk->total - n; } if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS | SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY | SKCIPHER_WALK_DIFF)))) { unmap_src: skcipher_unmap_src(walk); } else if (walk->flags & SKCIPHER_WALK_DIFF) { skcipher_unmap_dst(walk); goto unmap_src; } else if (walk->flags & SKCIPHER_WALK_COPY) { skcipher_map_dst(walk); memcpy(walk->dst.virt.addr, walk->page, n); skcipher_unmap_dst(walk); } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) { if (err > 0) { /* * Didn't process all bytes. Either the algorithm is * broken, or this was the last step and it turned out * the message wasn't evenly divisible into blocks but * the algorithm requires it. */ err = -EINVAL; nbytes = 0; } else n = skcipher_done_slow(walk, n); } if (err > 0) err = 0; walk->total = nbytes; walk->nbytes = 0; scatterwalk_advance(&walk->in, n); scatterwalk_advance(&walk->out, n); scatterwalk_done(&walk->in, 0, nbytes); scatterwalk_done(&walk->out, 1, nbytes); if (nbytes) { crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ? CRYPTO_TFM_REQ_MAY_SLEEP : 0); return skcipher_walk_next(walk); } finish: /* Short-circuit for the common/fast path. */ if (!((unsigned long)walk->buffer | (unsigned long)walk->page)) goto out; if (walk->flags & SKCIPHER_WALK_PHYS) goto out; if (walk->iv != walk->oiv) memcpy(walk->oiv, walk->iv, walk->ivsize); if (walk->buffer != walk->page) kfree(walk->buffer); if (walk->page) free_page((unsigned long)walk->page); out: return err; } EXPORT_SYMBOL_GPL(skcipher_walk_done); void skcipher_walk_complete(struct skcipher_walk *walk, int err) { struct skcipher_walk_buffer *p, *tmp; list_for_each_entry_safe(p, tmp, &walk->buffers, entry) { u8 *data; if (err) goto done; data = p->data; if (!data) { data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1); data = skcipher_get_spot(data, walk->stride); } scatterwalk_copychunks(data, &p->dst, p->len, 1); if (offset_in_page(p->data) + p->len + walk->stride > PAGE_SIZE) free_page((unsigned long)p->data); done: list_del(&p->entry); kfree(p); } if (!err && walk->iv != walk->oiv) memcpy(walk->oiv, walk->iv, walk->ivsize); if (walk->buffer != walk->page) kfree(walk->buffer); if (walk->page) free_page((unsigned long)walk->page); } EXPORT_SYMBOL_GPL(skcipher_walk_complete); static void skcipher_queue_write(struct skcipher_walk *walk, struct skcipher_walk_buffer *p) { p->dst = walk->out; list_add_tail(&p->entry, &walk->buffers); } static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize) { bool phys = walk->flags & SKCIPHER_WALK_PHYS; unsigned alignmask = walk->alignmask; struct skcipher_walk_buffer *p; unsigned a; unsigned n; u8 *buffer; void *v; if (!phys) { if (!walk->buffer) walk->buffer = walk->page; buffer = walk->buffer; if (buffer) goto ok; } /* Start with the minimum alignment of kmalloc. */ a = crypto_tfm_ctx_alignment() - 1; n = bsize; if (phys) { /* Calculate the minimum alignment of p->buffer. */ a &= (sizeof(*p) ^ (sizeof(*p) - 1)) >> 1; n += sizeof(*p); } /* Minimum size to align p->buffer by alignmask. */ n += alignmask & ~a; /* Minimum size to ensure p->buffer does not straddle a page. */ n += (bsize - 1) & ~(alignmask | a); v = kzalloc(n, skcipher_walk_gfp(walk)); if (!v) return skcipher_walk_done(walk, -ENOMEM); if (phys) { p = v; p->len = bsize; skcipher_queue_write(walk, p); buffer = p->buffer; } else { walk->buffer = v; buffer = v; } ok: walk->dst.virt.addr = PTR_ALIGN(buffer, alignmask + 1); walk->dst.virt.addr = skcipher_get_spot(walk->dst.virt.addr, bsize); walk->src.virt.addr = walk->dst.virt.addr; scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0); walk->nbytes = bsize; walk->flags |= SKCIPHER_WALK_SLOW; return 0; } static int skcipher_next_copy(struct skcipher_walk *walk) { struct skcipher_walk_buffer *p; u8 *tmp = walk->page; skcipher_map_src(walk); memcpy(tmp, walk->src.virt.addr, walk->nbytes); skcipher_unmap_src(walk); walk->src.virt.addr = tmp; walk->dst.virt.addr = tmp; if (!(walk->flags & SKCIPHER_WALK_PHYS)) return 0; p = kmalloc(sizeof(*p), skcipher_walk_gfp(walk)); if (!p) return -ENOMEM; p->data = walk->page; p->len = walk->nbytes; skcipher_queue_write(walk, p); if (offset_in_page(walk->page) + walk->nbytes + walk->stride > PAGE_SIZE) walk->page = NULL; else walk->page += walk->nbytes; return 0; } static int skcipher_next_fast(struct skcipher_walk *walk) { unsigned long diff; walk->src.phys.page = scatterwalk_page(&walk->in); walk->src.phys.offset = offset_in_page(walk->in.offset); walk->dst.phys.page = scatterwalk_page(&walk->out); walk->dst.phys.offset = offset_in_page(walk->out.offset); if (walk->flags & SKCIPHER_WALK_PHYS) return 0; diff = walk->src.phys.offset - walk->dst.phys.offset; diff |= walk->src.virt.page - walk->dst.virt.page; skcipher_map_src(walk); walk->dst.virt.addr = walk->src.virt.addr; if (diff) { walk->flags |= SKCIPHER_WALK_DIFF; skcipher_map_dst(walk); } return 0; } static int skcipher_walk_next(struct skcipher_walk *walk) { unsigned int bsize; unsigned int n; int err; walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY | SKCIPHER_WALK_DIFF); n = walk->total; bsize = min(walk->stride, max(n, walk->blocksize)); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); if (unlikely(n < bsize)) { if (unlikely(walk->total < walk->blocksize)) return skcipher_walk_done(walk, -EINVAL); slow_path: err = skcipher_next_slow(walk, bsize); goto set_phys_lowmem; } if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) { if (!walk->page) { gfp_t gfp = skcipher_walk_gfp(walk); walk->page = (void *)__get_free_page(gfp); if (!walk->page) goto slow_path; } walk->nbytes = min_t(unsigned, n, PAGE_SIZE - offset_in_page(walk->page)); walk->flags |= SKCIPHER_WALK_COPY; err = skcipher_next_copy(walk); goto set_phys_lowmem; } walk->nbytes = n; return skcipher_next_fast(walk); set_phys_lowmem: if (!err && (walk->flags & SKCIPHER_WALK_PHYS)) { walk->src.phys.page = virt_to_page(walk->src.virt.addr); walk->dst.phys.page = virt_to_page(walk->dst.virt.addr); walk->src.phys.offset &= PAGE_SIZE - 1; walk->dst.phys.offset &= PAGE_SIZE - 1; } return err; } static int skcipher_copy_iv(struct skcipher_walk *walk) { unsigned a = crypto_tfm_ctx_alignment() - 1; unsigned alignmask = walk->alignmask; unsigned ivsize = walk->ivsize; unsigned bs = walk->stride; unsigned aligned_bs; unsigned size; u8 *iv; aligned_bs = ALIGN(bs, alignmask + 1); /* Minimum size to align buffer by alignmask. */ size = alignmask & ~a; if (walk->flags & SKCIPHER_WALK_PHYS) size += ivsize; else { size += aligned_bs + ivsize; /* Minimum size to ensure buffer does not straddle a page. */ size += (bs - 1) & ~(alignmask | a); } walk->buffer = kmalloc(size, skcipher_walk_gfp(walk)); if (!walk->buffer) return -ENOMEM; iv = PTR_ALIGN(walk->buffer, alignmask + 1); iv = skcipher_get_spot(iv, bs) + aligned_bs; walk->iv = memcpy(iv, walk->iv, walk->ivsize); return 0; } static int skcipher_walk_first(struct skcipher_walk *walk) { if (WARN_ON_ONCE(in_hardirq())) return -EDEADLK; walk->buffer = NULL; if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { int err = skcipher_copy_iv(walk); if (err) return err; } walk->page = NULL; return skcipher_walk_next(walk); } static int skcipher_walk_skcipher(struct skcipher_walk *walk, struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); walk->total = req->cryptlen; walk->nbytes = 0; walk->iv = req->iv; walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? SKCIPHER_WALK_SLEEP : 0; walk->blocksize = crypto_skcipher_blocksize(tfm); walk->ivsize = crypto_skcipher_ivsize(tfm); walk->alignmask = crypto_skcipher_alignmask(tfm); if (alg->co.base.cra_type != &crypto_skcipher_type) walk->stride = alg->co.chunksize; else walk->stride = alg->walksize; return skcipher_walk_first(walk); } int skcipher_walk_virt(struct skcipher_walk *walk, struct skcipher_request *req, bool atomic) { int err; might_sleep_if(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); walk->flags &= ~SKCIPHER_WALK_PHYS; err = skcipher_walk_skcipher(walk, req); walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0; return err; } EXPORT_SYMBOL_GPL(skcipher_walk_virt); int skcipher_walk_async(struct skcipher_walk *walk, struct skcipher_request *req) { walk->flags |= SKCIPHER_WALK_PHYS; INIT_LIST_HEAD(&walk->buffers); return skcipher_walk_skcipher(walk, req); } EXPORT_SYMBOL_GPL(skcipher_walk_async); static int skcipher_walk_aead_common(struct skcipher_walk *walk, struct aead_request *req, bool atomic) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); int err; walk->nbytes = 0; walk->iv = req->iv; walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; walk->flags &= ~SKCIPHER_WALK_PHYS; scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2); scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2); scatterwalk_done(&walk->in, 0, walk->total); scatterwalk_done(&walk->out, 0, walk->total); if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) walk->flags |= SKCIPHER_WALK_SLEEP; else walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->blocksize = crypto_aead_blocksize(tfm); walk->stride = crypto_aead_chunksize(tfm); walk->ivsize = crypto_aead_ivsize(tfm); walk->alignmask = crypto_aead_alignmask(tfm); err = skcipher_walk_first(walk); if (atomic) walk->flags &= ~SKCIPHER_WALK_SLEEP; return err; } int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, struct aead_request *req, bool atomic) { walk->total = req->cryptlen; return skcipher_walk_aead_common(walk, req, atomic); } EXPORT_SYMBOL_GPL(skcipher_walk_aead_encrypt); int skcipher_walk_aead_decrypt(struct skcipher_walk *walk, struct aead_request *req, bool atomic) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); walk->total = req->cryptlen - crypto_aead_authsize(tfm); return skcipher_walk_aead_common(walk, req, atomic); } EXPORT_SYMBOL_GPL(skcipher_walk_aead_decrypt); static void skcipher_set_needkey(struct crypto_skcipher *tfm) { if (crypto_skcipher_max_keysize(tfm) != 0) crypto_skcipher_set_flags(tfm, CRYPTO_TFM_NEED_KEY); } static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { unsigned long alignmask = crypto_skcipher_alignmask(tfm); struct skcipher_alg *cipher = crypto_skcipher_alg(tfm); u8 *buffer, *alignbuffer; unsigned long absize; int ret; absize = keylen + alignmask; buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = cipher->setkey(tfm, alignbuffer, keylen); kfree_sensitive(buffer); return ret; } int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct skcipher_alg *cipher = crypto_skcipher_alg(tfm); unsigned long alignmask = crypto_skcipher_alignmask(tfm); int err; if (cipher->co.base.cra_type != &crypto_skcipher_type) { struct crypto_lskcipher **ctx = crypto_skcipher_ctx(tfm); crypto_lskcipher_clear_flags(*ctx, CRYPTO_TFM_REQ_MASK); crypto_lskcipher_set_flags(*ctx, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_lskcipher_setkey(*ctx, key, keylen); goto out; } if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) return -EINVAL; if ((unsigned long)key & alignmask) err = skcipher_setkey_unaligned(tfm, key, keylen); else err = cipher->setkey(tfm, key, keylen); out: if (unlikely(err)) { skcipher_set_needkey(tfm); return err; } crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); return 0; } EXPORT_SYMBOL_GPL(crypto_skcipher_setkey); int crypto_skcipher_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); int ret; if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { struct crypto_istat_cipher *istat = skcipher_get_stat(alg); atomic64_inc(&istat->encrypt_cnt); atomic64_add(req->cryptlen, &istat->encrypt_tlen); } if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else if (alg->co.base.cra_type != &crypto_skcipher_type) ret = crypto_lskcipher_encrypt_sg(req); else ret = alg->encrypt(req); return crypto_skcipher_errstat(alg, ret); } EXPORT_SYMBOL_GPL(crypto_skcipher_encrypt); int crypto_skcipher_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); int ret; if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { struct crypto_istat_cipher *istat = skcipher_get_stat(alg); atomic64_inc(&istat->decrypt_cnt); atomic64_add(req->cryptlen, &istat->decrypt_tlen); } if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else if (alg->co.base.cra_type != &crypto_skcipher_type) ret = crypto_lskcipher_decrypt_sg(req); else ret = alg->decrypt(req); return crypto_skcipher_errstat(alg, ret); } EXPORT_SYMBOL_GPL(crypto_skcipher_decrypt); static int crypto_lskcipher_export(struct skcipher_request *req, void *out) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); u8 *ivs = skcipher_request_ctx(req); ivs = PTR_ALIGN(ivs, crypto_skcipher_alignmask(tfm) + 1); memcpy(out, ivs + crypto_skcipher_ivsize(tfm), crypto_skcipher_statesize(tfm)); return 0; } static int crypto_lskcipher_import(struct skcipher_request *req, const void *in) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); u8 *ivs = skcipher_request_ctx(req); ivs = PTR_ALIGN(ivs, crypto_skcipher_alignmask(tfm) + 1); memcpy(ivs + crypto_skcipher_ivsize(tfm), in, crypto_skcipher_statesize(tfm)); return 0; } static int skcipher_noexport(struct skcipher_request *req, void *out) { return 0; } static int skcipher_noimport(struct skcipher_request *req, const void *in) { return 0; } int crypto_skcipher_export(struct skcipher_request *req, void *out) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); if (alg->co.base.cra_type != &crypto_skcipher_type) return crypto_lskcipher_export(req, out); return alg->export(req, out); } EXPORT_SYMBOL_GPL(crypto_skcipher_export); int crypto_skcipher_import(struct skcipher_request *req, const void *in) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); if (alg->co.base.cra_type != &crypto_skcipher_type) return crypto_lskcipher_import(req, in); return alg->import(req, in); } EXPORT_SYMBOL_GPL(crypto_skcipher_import); static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm) { struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); struct skcipher_alg *alg = crypto_skcipher_alg(skcipher); alg->exit(skcipher); } static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) { struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); struct skcipher_alg *alg = crypto_skcipher_alg(skcipher); skcipher_set_needkey(skcipher); if (tfm->__crt_alg->cra_type != &crypto_skcipher_type) { unsigned am = crypto_skcipher_alignmask(skcipher); unsigned reqsize; reqsize = am & ~(crypto_tfm_ctx_alignment() - 1); reqsize += crypto_skcipher_ivsize(skcipher); reqsize += crypto_skcipher_statesize(skcipher); crypto_skcipher_set_reqsize(skcipher, reqsize); return crypto_init_lskcipher_ops_sg(tfm); } if (alg->exit) skcipher->base.exit = crypto_skcipher_exit_tfm; if (alg->init) return alg->init(skcipher); return 0; } static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg) { if (alg->cra_type != &crypto_skcipher_type) return sizeof(struct crypto_lskcipher *); return crypto_alg_extsize(alg); } static void crypto_skcipher_free_instance(struct crypto_instance *inst) { struct skcipher_instance *skcipher = container_of(inst, struct skcipher_instance, s.base); skcipher->free(skcipher); } static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) __maybe_unused; static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct skcipher_alg *skcipher = __crypto_skcipher_alg(alg); seq_printf(m, "type : skcipher\n"); seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ? "yes" : "no"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "min keysize : %u\n", skcipher->min_keysize); seq_printf(m, "max keysize : %u\n", skcipher->max_keysize); seq_printf(m, "ivsize : %u\n", skcipher->ivsize); seq_printf(m, "chunksize : %u\n", skcipher->chunksize); seq_printf(m, "walksize : %u\n", skcipher->walksize); seq_printf(m, "statesize : %u\n", skcipher->statesize); } static int __maybe_unused crypto_skcipher_report( struct sk_buff *skb, struct crypto_alg *alg) { struct skcipher_alg *skcipher = __crypto_skcipher_alg(alg); struct crypto_report_blkcipher rblkcipher; memset(&rblkcipher, 0, sizeof(rblkcipher)); strscpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type)); strscpy(rblkcipher.geniv, "<none>", sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = skcipher->min_keysize; rblkcipher.max_keysize = skcipher->max_keysize; rblkcipher.ivsize = skcipher->ivsize; return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, sizeof(rblkcipher), &rblkcipher); } static int __maybe_unused crypto_skcipher_report_stat( struct sk_buff *skb, struct crypto_alg *alg) { struct skcipher_alg *skcipher = __crypto_skcipher_alg(alg); struct crypto_istat_cipher *istat; struct crypto_stat_cipher rcipher; istat = skcipher_get_stat(skcipher); memset(&rcipher, 0, sizeof(rcipher)); strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); rcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); rcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); rcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); rcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); } static const struct crypto_type crypto_skcipher_type = { .extsize = crypto_skcipher_extsize, .init_tfm = crypto_skcipher_init_tfm, .free = crypto_skcipher_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_skcipher_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_skcipher_report, #endif #ifdef CONFIG_CRYPTO_STATS .report_stat = crypto_skcipher_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_SKCIPHER_MASK, .type = CRYPTO_ALG_TYPE_SKCIPHER, .tfmsize = offsetof(struct crypto_skcipher, base), }; int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_skcipher_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_skcipher); struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_skcipher); struct crypto_sync_skcipher *crypto_alloc_sync_skcipher( const char *alg_name, u32 type, u32 mask) { struct crypto_skcipher *tfm; /* Only sync algorithms allowed. */ mask |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE; tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask); /* * Make sure we do not allocate something that might get used with * an on-stack request: check the request size. */ if (!IS_ERR(tfm) && WARN_ON(crypto_skcipher_reqsize(tfm) > MAX_SYNC_SKCIPHER_REQSIZE)) { crypto_free_skcipher(tfm); return ERR_PTR(-EINVAL); } return (struct crypto_sync_skcipher *)tfm; } EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher); int crypto_has_skcipher(const char *alg_name, u32 type, u32 mask) { return crypto_type_has_alg(alg_name, &crypto_skcipher_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_has_skcipher); int skcipher_prepare_alg_common(struct skcipher_alg_common *alg) { struct crypto_istat_cipher *istat = skcipher_get_stat_common(alg); struct crypto_alg *base = &alg->base; if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 || alg->statesize > PAGE_SIZE / 2 || (alg->ivsize + alg->statesize) > PAGE_SIZE / 2) return -EINVAL; if (!alg->chunksize) alg->chunksize = base->cra_blocksize; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; if (IS_ENABLED(CONFIG_CRYPTO_STATS)) memset(istat, 0, sizeof(*istat)); return 0; } static int skcipher_prepare_alg(struct skcipher_alg *alg) { struct crypto_alg *base = &alg->base; int err; err = skcipher_prepare_alg_common(&alg->co); if (err) return err; if (alg->walksize > PAGE_SIZE / 8) return -EINVAL; if (!alg->walksize) alg->walksize = alg->chunksize; if (!alg->statesize) { alg->import = skcipher_noimport; alg->export = skcipher_noexport; } else if (!(alg->import && alg->export)) return -EINVAL; base->cra_type = &crypto_skcipher_type; base->cra_flags |= CRYPTO_ALG_TYPE_SKCIPHER; return 0; } int crypto_register_skcipher(struct skcipher_alg *alg) { struct crypto_alg *base = &alg->base; int err; err = skcipher_prepare_alg(alg); if (err) return err; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_skcipher); void crypto_unregister_skcipher(struct skcipher_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_skcipher); int crypto_register_skciphers(struct skcipher_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_skcipher(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_skcipher(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_skciphers); void crypto_unregister_skciphers(struct skcipher_alg *algs, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_skcipher(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_skciphers); int skcipher_register_instance(struct crypto_template *tmpl, struct skcipher_instance *inst) { int err; if (WARN_ON(!inst->free)) return -EINVAL; err = skcipher_prepare_alg(&inst->alg); if (err) return err; return crypto_register_instance(tmpl, skcipher_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(skcipher_register_instance); static int skcipher_setkey_simple(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct crypto_cipher *cipher = skcipher_cipher_simple(tfm); crypto_cipher_clear_flags(cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(cipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); return crypto_cipher_setkey(cipher, key, keylen); } static int skcipher_init_tfm_simple(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct crypto_cipher_spawn *spawn = skcipher_instance_ctx(inst); struct skcipher_ctx_simple *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *cipher; cipher = crypto_spawn_cipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->cipher = cipher; return 0; } static void skcipher_exit_tfm_simple(struct crypto_skcipher *tfm) { struct skcipher_ctx_simple *ctx = crypto_skcipher_ctx(tfm); crypto_free_cipher(ctx->cipher); } static void skcipher_free_instance_simple(struct skcipher_instance *inst) { crypto_drop_cipher(skcipher_instance_ctx(inst)); kfree(inst); } /** * skcipher_alloc_instance_simple - allocate instance of simple block cipher mode * * Allocate an skcipher_instance for a simple block cipher mode of operation, * e.g. cbc or ecb. The instance context will have just a single crypto_spawn, * that for the underlying cipher. The {min,max}_keysize, ivsize, blocksize, * alignmask, and priority are set from the underlying cipher but can be * overridden if needed. The tfm context defaults to skcipher_ctx_simple, and * default ->setkey(), ->init(), and ->exit() methods are installed. * * @tmpl: the template being instantiated * @tb: the template parameters * * Return: a pointer to the new instance, or an ERR_PTR(). The caller still * needs to register the instance. */ struct skcipher_instance *skcipher_alloc_instance_simple( struct crypto_template *tmpl, struct rtattr **tb) { u32 mask; struct skcipher_instance *inst; struct crypto_cipher_spawn *spawn; struct crypto_alg *cipher_alg; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return ERR_PTR(err); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return ERR_PTR(-ENOMEM); spawn = skcipher_instance_ctx(inst); err = crypto_grab_cipher(spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; cipher_alg = crypto_spawn_cipher_alg(spawn); err = crypto_inst_setname(skcipher_crypto_instance(inst), tmpl->name, cipher_alg); if (err) goto err_free_inst; inst->free = skcipher_free_instance_simple; /* Default algorithm properties, can be overridden */ inst->alg.base.cra_blocksize = cipher_alg->cra_blocksize; inst->alg.base.cra_alignmask = cipher_alg->cra_alignmask; inst->alg.base.cra_priority = cipher_alg->cra_priority; inst->alg.min_keysize = cipher_alg->cra_cipher.cia_min_keysize; inst->alg.max_keysize = cipher_alg->cra_cipher.cia_max_keysize; inst->alg.ivsize = cipher_alg->cra_blocksize; /* Use skcipher_ctx_simple by default, can be overridden */ inst->alg.base.cra_ctxsize = sizeof(struct skcipher_ctx_simple); inst->alg.setkey = skcipher_setkey_simple; inst->alg.init = skcipher_init_tfm_simple; inst->alg.exit = skcipher_exit_tfm_simple; return inst; err_free_inst: skcipher_free_instance_simple(inst); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skcipher_alloc_instance_simple); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Symmetric key cipher type"); MODULE_IMPORT_NS(CRYPTO_INTERNAL); |
16 11 11 1 25 25 3 409 43 461 461 25 419 423 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/fsync.c * * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com) * from * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * from * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds * * ext4fs fsync primitive * * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 * * Removed unnecessary code duplication for little endian machines * and excessive __inline__s. * Andi Kleen, 1997 * * Major simplications and cleanup - we only need to do the metadata, because * we can depend on generic_block_fdatasync() to sync the data blocks. */ #include <linux/time.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include "ext4.h" #include "ext4_jbd2.h" #include <trace/events/ext4.h> /* * If we're not journaling and this is a just-created file, we have to * sync our parent directory (if it was freshly created) since * otherwise it will only be written by writeback, leaving a huge * window during which a crash may lose the file. This may apply for * the parent directory's parent as well, and so on recursively, if * they are also freshly created. */ static int ext4_sync_parent(struct inode *inode) { struct dentry *dentry, *next; int ret = 0; if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) return 0; dentry = d_find_any_alias(inode); if (!dentry) return 0; while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); next = dget_parent(dentry); dput(dentry); dentry = next; inode = dentry->d_inode; /* * The directory inode may have gone through rmdir by now. But * the inode itself and its blocks are still allocated (we hold * a reference to the inode via its dentry), so it didn't go * through ext4_evict_inode()) and so we are safe to flush * metadata blocks and the inode. */ ret = sync_mapping_buffers(inode->i_mapping); if (ret) break; ret = sync_inode_metadata(inode, 1); if (ret) break; } dput(dentry); return ret; } static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, int datasync, bool *needs_barrier) { struct inode *inode = file->f_inode; int ret; ret = generic_buffers_fsync_noflush(file, start, end, datasync); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) *needs_barrier = true; return ret; } static int ext4_fsync_journal(struct inode *inode, bool datasync, bool *needs_barrier) { struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; /* * Fastcommit does not really support fsync on directories or other * special files. Force a full commit. */ if (!S_ISREG(inode->i_mode)) return ext4_force_commit(inode->i_sb); if (journal->j_flags & JBD2_BARRIER && !jbd2_trans_will_send_data_barrier(journal, commit_tid)) *needs_barrier = true; return ext4_fc_commit(journal, commit_tid); } /* * akpm: A new design for ext4_sync_file(). * * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). * There cannot be a transaction open by this task. * Another task could have dirtied this inode. Its data can be in any * state in the journalling system. * * What we do is just kick off a commit and wait on it. This will snapshot the * inode to disk. */ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { int ret = 0, err; bool needs_barrier = false; struct inode *inode = file->f_mapping->host; if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file_enter(file, datasync); if (sb_rdonly(inode->i_sb)) { /* Make sure that we read updated s_ext4_flags value */ smp_rmb(); if (ext4_forced_shutdown(inode->i_sb)) ret = -EROFS; goto out; } if (!EXT4_SB(inode->i_sb)->s_journal) { ret = ext4_fsync_nojournal(file, start, end, datasync, &needs_barrier); if (needs_barrier) goto issue_flush; goto out; } ret = file_write_and_wait_range(file, start, end); if (ret) goto out; /* * The caller's filemap_fdatawrite()/wait will sync the data. * Metadata is in the journal, we wait for proper transaction to * commit here. */ ret = ext4_fsync_journal(inode, datasync, &needs_barrier); issue_flush: if (needs_barrier) { err = blkdev_issue_flush(inode->i_sb->s_bdev); if (!ret) ret = err; } out: err = file_check_and_advance_wb_err(file); if (ret == 0) ret = err; trace_ext4_sync_file_exit(inode, ret); return ret; } |
26 17 143 38 105 4 105 17 11 14 14 13 13 13 3 1 2 2 1 1 1 1 10 5 16 1 15 1 13 12 12 12 12 12 12 12 12 11 13 2 2 13 2 2 9 7 4 3 41 40 2 34 7 3 8 1 1 4 5 5 6 3 3 3 10 1 5 2 2 5 2 3 1 4 15 1 1 6 2 5 14 6 2 2 2 14 4 10 10 5 1 4 2 1 1 1 5 5 6 1 2 3 2 2 3 1 4 2 2 4 1 12 4 21 3 1 17 18 4 3 1 1 15 3 9 8 1 1 1 1 5 2 3 13 1 11 3 1 2 6 1 1 2 3 3 3 3 3 3 6 209 1 1 1 2 2 1 204 11 11 5 5 2 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 | /* * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/completion.h> #include <linux/file.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/idr.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/module.h> #include <linux/nsproxy.h> #include <linux/nospec.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> #include <rdma/ib_addr.h> #include <rdma/ib.h> #include <rdma/ib_cm.h> #include <rdma/rdma_netlink.h> #include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); MODULE_LICENSE("Dual BSD/GPL"); static unsigned int max_backlog = 1024; static struct ctl_table_header *ucma_ctl_table_hdr; static struct ctl_table ucma_ctl_table[] = { { .procname = "max_backlog", .data = &max_backlog, .maxlen = sizeof max_backlog, .mode = 0644, .proc_handler = proc_dointvec, }, }; struct ucma_file { struct mutex mut; struct file *filp; struct list_head ctx_list; struct list_head event_list; wait_queue_head_t poll_wait; }; struct ucma_context { u32 id; struct completion comp; refcount_t ref; int events_reported; atomic_t backlog; struct ucma_file *file; struct rdma_cm_id *cm_id; struct mutex mutex; u64 uid; struct list_head list; struct list_head mc_list; struct work_struct close_work; }; struct ucma_multicast { struct ucma_context *ctx; u32 id; int events_reported; u64 uid; u8 join_state; struct list_head list; struct sockaddr_storage addr; }; struct ucma_event { struct ucma_context *ctx; struct ucma_context *conn_req_ctx; struct ucma_multicast *mc; struct list_head list; struct rdma_ucm_event_resp resp; }; static DEFINE_XARRAY_ALLOC(ctx_table); static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; static int ucma_destroy_private_ctx(struct ucma_context *ctx); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) { struct ucma_context *ctx; ctx = xa_load(&ctx_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); return ctx; } static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) { struct ucma_context *ctx; xa_lock(&ctx_table); ctx = _ucma_find_context(id, file); if (!IS_ERR(ctx)) if (!refcount_inc_not_zero(&ctx->ref)) ctx = ERR_PTR(-ENXIO); xa_unlock(&ctx_table); return ctx; } static void ucma_put_ctx(struct ucma_context *ctx) { if (refcount_dec_and_test(&ctx->ref)) complete(&ctx->comp); } /* * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the * CM_ID is bound. */ static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) { struct ucma_context *ctx = ucma_get_ctx(file, id); if (IS_ERR(ctx)) return ctx; if (!ctx->cm_id->device) { ucma_put_ctx(ctx); return ERR_PTR(-EINVAL); } return ctx; } static void ucma_close_id(struct work_struct *work) { struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); /* once all inflight tasks are finished, we close all underlying * resources. The context is still alive till its explicit destryoing * by its creator. This puts back the xarray's reference. */ ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); /* Reading the cm_id without holding a positive ref is not allowed */ ctx->cm_id = NULL; } static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); ctx->file = file; mutex_init(&ctx->mutex); if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) { kfree(ctx); return NULL; } return ctx; } static void ucma_set_ctx_cm_id(struct ucma_context *ctx, struct rdma_cm_id *cm_id) { refcount_set(&ctx->ref, 1); ctx->cm_id = cm_id; } static void ucma_finish_ctx(struct ucma_context *ctx) { lockdep_assert_held(&ctx->file->mut); list_add_tail(&ctx->list, &ctx->file->ctx_list); xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL); } static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, struct rdma_conn_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; dst->responder_resources = src->responder_resources; dst->initiator_depth = src->initiator_depth; dst->flow_control = src->flow_control; dst->retry_count = src->retry_count; dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num; } static void ucma_copy_ud_event(struct ib_device *device, struct rdma_ucm_ud_param *dst, struct rdma_ud_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); dst->qp_num = src->qp_num; dst->qkey = src->qkey; } static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, struct rdma_cm_event *event) { struct ucma_event *uevent; uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); if (!uevent) return NULL; uevent->ctx = ctx; switch (event->event) { case RDMA_CM_EVENT_MULTICAST_JOIN: case RDMA_CM_EVENT_MULTICAST_ERROR: uevent->mc = (struct ucma_multicast *) event->param.ud.private_data; uevent->resp.uid = uevent->mc->uid; uevent->resp.id = uevent->mc->id; break; default: uevent->resp.uid = ctx->uid; uevent->resp.id = ctx->id; break; } uevent->resp.event = event->event; uevent->resp.status = event->status; if (ctx->cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); uevent->resp.ece.vendor_id = event->ece.vendor_id; uevent->resp.ece.attr_mod = event->ece.attr_mod; return uevent; } static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct ucma_context *listen_ctx = cm_id->context; struct ucma_context *ctx; struct ucma_event *uevent; if (!atomic_add_unless(&listen_ctx->backlog, -1, 0)) return -ENOMEM; ctx = ucma_alloc_ctx(listen_ctx->file); if (!ctx) goto err_backlog; ucma_set_ctx_cm_id(ctx, cm_id); uevent = ucma_create_uevent(listen_ctx, event); if (!uevent) goto err_alloc; uevent->conn_req_ctx = ctx; uevent->resp.id = ctx->id; ctx->cm_id->context = ctx; mutex_lock(&ctx->file->mut); ucma_finish_ctx(ctx); list_add_tail(&uevent->list, &ctx->file->event_list); mutex_unlock(&ctx->file->mut); wake_up_interruptible(&ctx->file->poll_wait); return 0; err_alloc: ucma_destroy_private_ctx(ctx); err_backlog: atomic_inc(&listen_ctx->backlog); /* Returning error causes the new ID to be destroyed */ return -ENOMEM; } static int ucma_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct ucma_event *uevent; struct ucma_context *ctx = cm_id->context; if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) return ucma_connect_event_handler(cm_id, event); /* * We ignore events for new connections until userspace has set their * context. This can only happen if an error occurs on a new connection * before the user accepts it. This is okay, since the accept will just * fail later. However, we do need to release the underlying HW * resources in case of a device removal event. */ if (ctx->uid) { uevent = ucma_create_uevent(ctx, event); if (!uevent) return 0; mutex_lock(&ctx->file->mut); list_add_tail(&uevent->list, &ctx->file->event_list); mutex_unlock(&ctx->file->mut); wake_up_interruptible(&ctx->file->poll_wait); } if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { xa_lock(&ctx_table); if (xa_load(&ctx_table, ctx->id) == ctx) queue_work(system_unbound_wq, &ctx->close_work); xa_unlock(&ctx_table); } return 0; } static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_get_event cmd; struct ucma_event *uevent; /* * Old 32 bit user space does not send the 4 byte padding in the * reserved field. We don't care, allow it to keep working. */ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - sizeof(uevent->resp.ece)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->mut); while (list_empty(&file->event_list)) { mutex_unlock(&file->mut); if (file->filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->event_list))) return -ERESTARTSYS; mutex_lock(&file->mut); } uevent = list_first_entry(&file->event_list, struct ucma_event, list); if (copy_to_user(u64_to_user_ptr(cmd.response), &uevent->resp, min_t(size_t, out_len, sizeof(uevent->resp)))) { mutex_unlock(&file->mut); return -EFAULT; } list_del(&uevent->list); uevent->ctx->events_reported++; if (uevent->mc) uevent->mc->events_reported++; if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) atomic_inc(&uevent->ctx->backlog); mutex_unlock(&file->mut); kfree(uevent); return 0; } static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) { switch (cmd->ps) { case RDMA_PS_TCP: *qp_type = IB_QPT_RC; return 0; case RDMA_PS_UDP: case RDMA_PS_IPOIB: *qp_type = IB_QPT_UD; return 0; case RDMA_PS_IB: *qp_type = cmd->qp_type; return 0; default: return -EINVAL; } } static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_create_id cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; struct rdma_cm_id *cm_id; enum ib_qp_type qp_type; int ret; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ret = ucma_get_qp_type(&cmd, &qp_type); if (ret) return ret; ctx = ucma_alloc_ctx(file); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); goto err1; } ucma_set_ctx_cm_id(ctx, cm_id); resp.id = ctx->id; if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { ret = -EFAULT; goto err1; } mutex_lock(&file->mut); ucma_finish_ctx(ctx); mutex_unlock(&file->mut); return 0; err1: ucma_destroy_private_ctx(ctx); return ret; } static void ucma_cleanup_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc, *tmp; xa_lock(&multicast_table); list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { list_del(&mc->list); /* * At this point mc->ctx->ref is 0 so the mc cannot leave the * lock on the reader and this is enough serialization */ __xa_erase(&multicast_table, mc->id); kfree(mc); } xa_unlock(&multicast_table); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) { struct ucma_event *uevent, *tmp; rdma_lock_handler(mc->ctx->cm_id); mutex_lock(&mc->ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { if (uevent->mc != mc) continue; list_del(&uevent->list); kfree(uevent); } mutex_unlock(&mc->ctx->file->mut); rdma_unlock_handler(mc->ctx->cm_id); } static int ucma_cleanup_ctx_events(struct ucma_context *ctx) { int events_reported; struct ucma_event *uevent, *tmp; LIST_HEAD(list); /* Cleanup events not yet reported to the user.*/ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { if (uevent->ctx != ctx) continue; if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, uevent->conn_req_ctx, XA_ZERO_ENTRY, GFP_KERNEL) == uevent->conn_req_ctx) { list_move_tail(&uevent->list, &list); continue; } list_del(&uevent->list); kfree(uevent); } list_del(&ctx->list); events_reported = ctx->events_reported; mutex_unlock(&ctx->file->mut); /* * If this was a listening ID then any connections spawned from it that * have not been delivered to userspace are cleaned up too. Must be done * outside any locks. */ list_for_each_entry_safe(uevent, tmp, &list, list) { ucma_destroy_private_ctx(uevent->conn_req_ctx); kfree(uevent); } return events_reported; } /* * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie * the ctx is not public to the user). This either because: * - ucma_finish_ctx() hasn't been called * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) */ static int ucma_destroy_private_ctx(struct ucma_context *ctx) { int events_reported; /* * Destroy the underlying cm_id. New work queuing is prevented now by * the removal from the xarray. Once the work is cancled ref will either * be 0 because the work ran to completion and consumed the ref from the * xarray, or it will be positive because we still have the ref from the * xarray. This can also be 0 in cases where cm_id was never set */ cancel_work_sync(&ctx->close_work); if (refcount_read(&ctx->ref)) ucma_close_id(&ctx->close_work); events_reported = ucma_cleanup_ctx_events(ctx); ucma_cleanup_multicast(ctx); WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, GFP_KERNEL) != NULL); mutex_destroy(&ctx->mutex); kfree(ctx); return events_reported; } static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_destroy_id cmd; struct rdma_ucm_destroy_id_resp resp; struct ucma_context *ctx; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; xa_lock(&ctx_table); ctx = _ucma_find_context(cmd.id, file); if (!IS_ERR(ctx)) { if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, GFP_KERNEL) != ctx) ctx = ERR_PTR(-ENOENT); } xa_unlock(&ctx_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.events_reported = ucma_destroy_private_ctx(ctx); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; return ret; } static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_bind_ip cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (!rdma_addr_size_in6(&cmd.addr)) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_bind cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (cmd.reserved || !cmd.addr_size || cmd.addr_size != rdma_addr_size_kss(&cmd.addr)) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_ip cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || !rdma_addr_size_in6(&cmd.dst_addr)) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_addr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_addr cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) || !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr))) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_route cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; resp->num_paths = route->num_pri_alt_paths; switch (route->num_pri_alt_paths) { case 0: dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); break; case 2: ib_copy_path_rec_to_user(&resp->ib_route[1], &route->path_rec[1]); fallthrough; case 1: ib_copy_path_rec_to_user(&resp->ib_route[0], &route->path_rec[0]); break; default: break; } } static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { resp->num_paths = route->num_pri_alt_paths; switch (route->num_pri_alt_paths) { case 0: rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, (union ib_gid *)&resp->ib_route[0].dgid); rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, (union ib_gid *)&resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(0xffff); break; case 2: ib_copy_path_rec_to_user(&resp->ib_route[1], &route->path_rec[1]); fallthrough; case 1: ib_copy_path_rec_to_user(&resp->ib_route[0], &route->path_rec[0]); break; default: break; } } static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); } static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_query cmd; struct rdma_ucm_query_route_resp resp; struct ucma_context *ctx; struct sockaddr *addr; int ret = 0; if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); if (!ctx->cm_id->device) goto out; resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.ibdev_index = ctx->cm_id->device->index; resp.port_num = ctx->cm_id->port_num; if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_ib_route(&resp, &ctx->cm_id->route); else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iboe_route(&resp, &ctx->cm_id->route); else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iw_route(&resp, &ctx->cm_id->route); out: mutex_unlock(&ctx->mutex); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, min_t(size_t, out_len, sizeof(resp)))) ret = -EFAULT; ucma_put_ctx(ctx); return ret; } static void ucma_query_device_addr(struct rdma_cm_id *cm_id, struct rdma_ucm_query_addr_resp *resp) { if (!cm_id->device) return; resp->node_guid = (__force __u64) cm_id->device->node_guid; resp->ibdev_index = cm_id->device->index; resp->port_num = cm_id->port_num; resp->pkey = (__force __u16) cpu_to_be16( ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); } static ssize_t ucma_query_addr(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_addr_resp resp; struct sockaddr *addr; int ret = 0; if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) return -ENOSPC; memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; resp.src_size = rdma_addr_size(addr); memcpy(&resp.src_addr, addr, resp.src_size); addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; resp.dst_size = rdma_addr_size(addr); memcpy(&resp.dst_addr, addr, resp.dst_size); ucma_query_device_addr(ctx->cm_id, &resp); if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) ret = -EFAULT; return ret; } static ssize_t ucma_query_path(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_path_resp *resp; int i, ret = 0; if (out_len < sizeof(*resp)) return -ENOSPC; resp = kzalloc(out_len, GFP_KERNEL); if (!resp) return -ENOMEM; resp->num_paths = ctx->cm_id->route.num_pri_alt_paths; for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i]; resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | IB_PATH_BIDIRECTIONAL; if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { struct sa_path_rec ib; sa_convert_path_opa_to_ib(&ib, rec); ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); } else { ib_sa_pack_path(rec, &resp->path_data[i].path_rec); } } if (copy_to_user(response, resp, struct_size(resp, path_data, i))) ret = -EFAULT; kfree(resp); return ret; } static ssize_t ucma_query_gid(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_addr_resp resp; struct sockaddr_ib *addr; int ret = 0; if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index)) return -ENOSPC; memset(&resp, 0, sizeof resp); ucma_query_device_addr(ctx->cm_id, &resp); addr = (struct sockaddr_ib *) &resp.src_addr; resp.src_size = sizeof(*addr); if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); } else { addr->sib_family = AF_IB; addr->sib_pkey = (__force __be16) resp.pkey; rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr, NULL); addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) &ctx->cm_id->route.addr.src_addr); } addr = (struct sockaddr_ib *) &resp.dst_addr; resp.dst_size = sizeof(*addr); if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); } else { addr->sib_family = AF_IB; addr->sib_pkey = (__force __be16) resp.pkey; rdma_read_gids(ctx->cm_id, NULL, (union ib_gid *)&addr->sib_addr); addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr); } if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp)))) ret = -EFAULT; return ret; } static ssize_t ucma_query(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_query cmd; struct ucma_context *ctx; void __user *response; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; response = u64_to_user_ptr(cmd.response); ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); switch (cmd.option) { case RDMA_USER_CM_QUERY_ADDR: ret = ucma_query_addr(ctx, response, out_len); break; case RDMA_USER_CM_QUERY_PATH: ret = ucma_query_path(ctx, response, out_len); break; case RDMA_USER_CM_QUERY_GID: ret = ucma_query_gid(ctx, response, out_len); break; default: ret = -ENOSYS; break; } mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static void ucma_copy_conn_param(struct rdma_cm_id *id, struct rdma_conn_param *dst, struct rdma_ucm_conn_param *src) { dst->private_data = src->private_data; dst->private_data_len = src->private_data_len; dst->responder_resources = src->responder_resources; dst->initiator_depth = src->initiator_depth; dst->flow_control = src->flow_control; dst->retry_count = src->retry_count; dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num & 0xFFFFFF; dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; } static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_conn_param conn_param; struct rdma_ucm_ece ece = {}; struct rdma_ucm_connect cmd; struct ucma_context *ctx; size_t in_size; int ret; if (in_len < offsetofend(typeof(cmd), reserved)) return -EINVAL; in_size = min_t(size_t, in_len, sizeof(cmd)); if (copy_from_user(&cmd, inbuf, in_size)) return -EFAULT; if (!cmd.conn_param.valid) return -EINVAL; ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); if (offsetofend(typeof(cmd), ece) <= in_size) { ece.vendor_id = cmd.ece.vendor_id; ece.attr_mod = cmd.ece.attr_mod; } mutex_lock(&ctx->mutex); ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_listen cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); if (cmd.backlog <= 0 || cmd.backlog > max_backlog) cmd.backlog = max_backlog; atomic_set(&ctx->backlog, cmd.backlog); mutex_lock(&ctx->mutex); ret = rdma_listen(ctx->cm_id, cmd.backlog); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_accept cmd; struct rdma_conn_param conn_param; struct rdma_ucm_ece ece = {}; struct ucma_context *ctx; size_t in_size; int ret; if (in_len < offsetofend(typeof(cmd), reserved)) return -EINVAL; in_size = min_t(size_t, in_len, sizeof(cmd)); if (copy_from_user(&cmd, inbuf, in_size)) return -EFAULT; ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); if (offsetofend(typeof(cmd), ece) <= in_size) { ece.vendor_id = cmd.ece.vendor_id; ece.attr_mod = cmd.ece.attr_mod; } if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&ctx->mutex); rdma_lock_handler(ctx->cm_id); ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); if (!ret) { /* The uid must be set atomically with the handler */ ctx->uid = cmd.uid; } rdma_unlock_handler(ctx->cm_id); mutex_unlock(&ctx->mutex); } else { mutex_lock(&ctx->mutex); rdma_lock_handler(ctx->cm_id); ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); rdma_unlock_handler(ctx->cm_id); mutex_unlock(&ctx->mutex); } ucma_put_ctx(ctx); return ret; } static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_reject cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (!cmd.reason) cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; switch (cmd.reason) { case IB_CM_REJ_CONSUMER_DEFINED: case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: break; default: return -EINVAL; } ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, cmd.reason); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_disconnect cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); ret = rdma_disconnect(ctx->cm_id); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_init_qp_attr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_init_qp_attr cmd; struct ib_uverbs_qp_attr resp; struct ucma_context *ctx; struct ib_qp_attr qp_attr; int ret; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (cmd.qp_state > IB_QPS_ERR) return -EINVAL; ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; mutex_lock(&ctx->mutex); ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); mutex_unlock(&ctx->mutex); if (ret) goto out; ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; out: ucma_put_ctx(ctx); return ret; } static int ucma_set_option_id(struct ucma_context *ctx, int optname, void *optval, size_t optlen) { int ret = 0; switch (optname) { case RDMA_OPTION_ID_TOS: if (optlen != sizeof(u8)) { ret = -EINVAL; break; } rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); break; case RDMA_OPTION_ID_REUSEADDR: if (optlen != sizeof(int)) { ret = -EINVAL; break; } ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); break; case RDMA_OPTION_ID_AFONLY: if (optlen != sizeof(int)) { ret = -EINVAL; break; } ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); break; case RDMA_OPTION_ID_ACK_TIMEOUT: if (optlen != sizeof(u8)) { ret = -EINVAL; break; } ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval)); break; default: ret = -ENOSYS; } return ret; } static int ucma_set_ib_path(struct ucma_context *ctx, struct ib_path_rec_data *path_data, size_t optlen) { struct sa_path_rec sa_path; struct rdma_cm_event event; int ret; if (optlen % sizeof(*path_data)) return -EINVAL; for (; optlen; optlen -= sizeof(*path_data), path_data++) { if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | IB_PATH_BIDIRECTIONAL)) break; } if (!optlen) return -EINVAL; if (!ctx->cm_id->device) return -EINVAL; memset(&sa_path, 0, sizeof(sa_path)); sa_path.rec_type = SA_PATH_REC_TYPE_IB; ib_sa_unpack_path(path_data->path_rec, &sa_path); if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) { struct sa_path_rec opa; sa_convert_path_ib_to_opa(&opa, &sa_path); mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &opa); mutex_unlock(&ctx->mutex); } else { mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &sa_path); mutex_unlock(&ctx->mutex); } if (ret) return ret; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; return ucma_event_handler(ctx->cm_id, &event); } static int ucma_set_option_ib(struct ucma_context *ctx, int optname, void *optval, size_t optlen) { int ret; switch (optname) { case RDMA_OPTION_IB_PATH: ret = ucma_set_ib_path(ctx, optval, optlen); break; default: ret = -ENOSYS; } return ret; } static int ucma_set_option_level(struct ucma_context *ctx, int level, int optname, void *optval, size_t optlen) { int ret; switch (level) { case RDMA_OPTION_ID: mutex_lock(&ctx->mutex); ret = ucma_set_option_id(ctx, optname, optval, optlen); mutex_unlock(&ctx->mutex); break; case RDMA_OPTION_IB: ret = ucma_set_option_ib(ctx, optname, optval, optlen); break; default: ret = -ENOSYS; } return ret; } static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_set_option cmd; struct ucma_context *ctx; void *optval; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); optval = memdup_user(u64_to_user_ptr(cmd.optval), cmd.optlen); if (IS_ERR(optval)) { ret = PTR_ERR(optval); goto out; } ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, cmd.optlen); kfree(optval); out: ucma_put_ctx(ctx); return ret; } static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_notify cmd; struct ucma_context *ctx; int ret = -EINVAL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); if (ctx->cm_id->device) ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_process_join(struct ucma_file *file, struct rdma_ucm_join_mcast *cmd, int out_len) { struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; struct ucma_multicast *mc; struct sockaddr *addr; int ret; u8 join_state; if (out_len < sizeof(resp)) return -ENOSPC; addr = (struct sockaddr *) &cmd->addr; if (cmd->addr_size != rdma_addr_size(addr)) return -EINVAL; if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) join_state = BIT(FULLMEMBER_JOIN); else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) join_state = BIT(SENDONLY_FULLMEMBER_JOIN); else return -EINVAL; ctx = ucma_get_ctx_dev(file, cmd->id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) { ret = -ENOMEM; goto err_put_ctx; } mc->ctx = ctx; mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); xa_lock(&multicast_table); if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) { ret = -ENOMEM; goto err_free_mc; } list_add_tail(&mc->list, &ctx->mc_list); xa_unlock(&multicast_table); mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); mutex_unlock(&ctx->mutex); if (ret) goto err_xa_erase; resp.id = mc->id; if (copy_to_user(u64_to_user_ptr(cmd->response), &resp, sizeof(resp))) { ret = -EFAULT; goto err_leave_multicast; } xa_store(&multicast_table, mc->id, mc, 0); ucma_put_ctx(ctx); return 0; err_leave_multicast: mutex_lock(&ctx->mutex); rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_unlock(&ctx->mutex); ucma_cleanup_mc_events(mc); err_xa_erase: xa_lock(&multicast_table); list_del(&mc->list); __xa_erase(&multicast_table, mc->id); err_free_mc: xa_unlock(&multicast_table); kfree(mc); err_put_ctx: ucma_put_ctx(ctx); return ret; } static ssize_t ucma_join_ip_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_join_ip_mcast cmd; struct rdma_ucm_join_mcast join_cmd; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; join_cmd.response = cmd.response; join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr); if (!join_cmd.addr_size) return -EINVAL; join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); return ucma_process_join(file, &join_cmd, out_len); } static ssize_t ucma_join_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_join_mcast cmd; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (!rdma_addr_size_kss(&cmd.addr)) return -EINVAL; return ucma_process_join(file, &cmd, out_len); } static ssize_t ucma_leave_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_destroy_id cmd; struct rdma_ucm_destroy_id_resp resp; struct ucma_multicast *mc; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; xa_lock(&multicast_table); mc = xa_load(&multicast_table, cmd.id); if (!mc) mc = ERR_PTR(-ENOENT); else if (READ_ONCE(mc->ctx->file) != file) mc = ERR_PTR(-EINVAL); else if (!refcount_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); if (IS_ERR(mc)) { xa_unlock(&multicast_table); ret = PTR_ERR(mc); goto out; } list_del(&mc->list); __xa_erase(&multicast_table, mc->id); xa_unlock(&multicast_table); mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_unlock(&mc->ctx->mutex); ucma_cleanup_mc_events(mc); ucma_put_ctx(mc->ctx); resp.events_reported = mc->events_reported; kfree(mc); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; out: return ret; } static ssize_t ucma_migrate_id(struct ucma_file *new_file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; struct ucma_event *uevent, *tmp; struct ucma_context *ctx; LIST_HEAD(event_list); struct fd f; struct ucma_file *cur_file; int ret = 0; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; /* Get current fd to protect against it being closed */ f = fdget(cmd.fd); if (!f.file) return -ENOENT; if (f.file->f_op != &ucma_fops) { ret = -EINVAL; goto file_put; } cur_file = f.file->private_data; /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(cur_file, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; } rdma_lock_handler(ctx->cm_id); /* * ctx->file can only be changed under the handler & xa_lock. xa_load() * must be checked again to ensure the ctx hasn't begun destruction * since the ucma_get_ctx(). */ xa_lock(&ctx_table); if (_ucma_find_context(cmd.id, cur_file) != ctx) { xa_unlock(&ctx_table); ret = -ENOENT; goto err_unlock; } ctx->file = new_file; xa_unlock(&ctx_table); mutex_lock(&cur_file->mut); list_del(&ctx->list); /* * At this point lock_handler() prevents addition of new uevents for * this ctx. */ list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list) if (uevent->ctx == ctx) list_move_tail(&uevent->list, &event_list); resp.events_reported = ctx->events_reported; mutex_unlock(&cur_file->mut); mutex_lock(&new_file->mut); list_add_tail(&ctx->list, &new_file->ctx_list); list_splice_tail(&event_list, &new_file->event_list); mutex_unlock(&new_file->mut); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; err_unlock: rdma_unlock_handler(ctx->cm_id); ucma_put_ctx(ctx); file_put: fdput(f); return ret; } static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, [RDMA_USER_CM_CMD_REJECT] = ucma_reject, [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, [RDMA_USER_CM_CMD_GET_OPTION] = NULL, [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, [RDMA_USER_CM_CMD_QUERY] = ucma_query, [RDMA_USER_CM_CMD_BIND] = ucma_bind, [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast }; static ssize_t ucma_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct ucma_file *file = filp->private_data; struct rdma_ucm_cmd_hdr hdr; ssize_t ret; if (!ib_safe_file_access(filp)) { pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", __func__, task_tgid_vnr(current), current->comm); return -EACCES; } if (len < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; if (!ucma_cmd_table[hdr.cmd]) return -ENOSYS; ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); if (!ret) ret = len; return ret; } static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait) { struct ucma_file *file = filp->private_data; __poll_t mask = 0; poll_wait(filp, &file->poll_wait, wait); if (!list_empty(&file->event_list)) mask = EPOLLIN | EPOLLRDNORM; return mask; } /* * ucma_open() does not need the BKL: * * - no global state is referred to; * - there is no ioctl method to race against; * - no further module initialization is required for open to work * after the device is registered. */ static int ucma_open(struct inode *inode, struct file *filp) { struct ucma_file *file; file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) return -ENOMEM; INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); mutex_init(&file->mut); filp->private_data = file; file->filp = filp; return stream_open(inode, filp); } static int ucma_close(struct inode *inode, struct file *filp) { struct ucma_file *file = filp->private_data; /* * All paths that touch ctx_list or ctx_list starting from write() are * prevented by this being a FD release function. The list_add_tail() in * ucma_connect_event_handler() can run concurrently, however it only * adds to the list *after* a listening ID. By only reading the first of * the list, and relying on ucma_destroy_private_ctx() to block * ucma_connect_event_handler(), no additional locking is needed. */ while (!list_empty(&file->ctx_list)) { struct ucma_context *ctx = list_first_entry( &file->ctx_list, struct ucma_context, list); WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, GFP_KERNEL) != ctx); ucma_destroy_private_ctx(ctx); } kfree(file); return 0; } static const struct file_operations ucma_fops = { .owner = THIS_MODULE, .open = ucma_open, .release = ucma_close, .write = ucma_write, .poll = ucma_poll, .llseek = no_llseek, }; static struct miscdevice ucma_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "rdma_cm", .nodename = "infiniband/rdma_cm", .mode = 0666, .fops = &ucma_fops, }; static int ucma_get_global_nl_info(struct ib_client_nl_info *res) { res->abi = RDMA_USER_CM_ABI_VERSION; res->cdev = ucma_misc.this_device; return 0; } static struct ib_client rdma_cma_client = { .name = "rdma_cm", .get_global_nl_info = ucma_get_global_nl_info, }; MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); static ssize_t abi_version_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } static DEVICE_ATTR_RO(abi_version); static int __init ucma_init(void) { int ret; ret = misc_register(&ucma_misc); if (ret) return ret; ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { pr_err("rdma_ucm: couldn't create abi_version attr\n"); goto err1; } ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); if (!ucma_ctl_table_hdr) { pr_err("rdma_ucm: couldn't register sysctl paths\n"); ret = -ENOMEM; goto err2; } ret = ib_register_client(&rdma_cma_client); if (ret) goto err3; return 0; err3: unregister_net_sysctl_table(ucma_ctl_table_hdr); err2: device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); err1: misc_deregister(&ucma_misc); return ret; } static void __exit ucma_cleanup(void) { ib_unregister_client(&rdma_cma_client); unregister_net_sysctl_table(ucma_ctl_table_hdr); device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); } module_init(ucma_init); module_exit(ucma_cleanup); |
47 118 19 251 250 66 14 61 156 149 14 13 2 11 71 71 10 61 27 12 16 27 27 53 53 37 16 4 49 13 3 10 3 17 3 35 5 61 13 31 17 36 36 20 16 31 5 17 7 1 6 106 67 39 10 10 9 12 2 3 4 8 5 3 5 3 4 3 5 1 3 3 9 7 14 5 1 3 2 4 9 105 3 2 2 3 14 82 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines * which can be dynamically activated and de-activated by the line * discipline handling modules (like SLIP). */ #include <linux/bits.h> #include <linux/types.h> #include <linux/termios.h> #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/tty.h> #include <linux/fcntl.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/compat.h> #include <linux/termios_internal.h> #include "tty.h" #include <asm/io.h> #include <linux/uaccess.h> #undef DEBUG /* * Internal flag options for termios setting behavior */ #define TERMIOS_FLUSH BIT(0) #define TERMIOS_WAIT BIT(1) #define TERMIOS_TERMIO BIT(2) #define TERMIOS_OLD BIT(3) /** * tty_chars_in_buffer - characters pending * @tty: terminal * * Returns: the number of bytes of data in the device private output queue. If * no private method is supplied there is assumed to be no queue on the device. */ unsigned int tty_chars_in_buffer(struct tty_struct *tty) { if (tty->ops->chars_in_buffer) return tty->ops->chars_in_buffer(tty); return 0; } EXPORT_SYMBOL(tty_chars_in_buffer); /** * tty_write_room - write queue space * @tty: terminal * * Returns: the number of bytes that can be queued to this device at the present * time. The result should be treated as a guarantee and the driver cannot * offer a value it later shrinks by more than the number of bytes written. If * no method is provided, 2K is always returned and data may be lost as there * will be no flow control. */ unsigned int tty_write_room(struct tty_struct *tty) { if (tty->ops->write_room) return tty->ops->write_room(tty); return 2048; } EXPORT_SYMBOL(tty_write_room); /** * tty_driver_flush_buffer - discard internal buffer * @tty: terminal * * Discard the internal output buffer for this device. If no method is provided, * then either the buffer cannot be hardware flushed or there is no buffer * driver side. */ void tty_driver_flush_buffer(struct tty_struct *tty) { if (tty->ops->flush_buffer) tty->ops->flush_buffer(tty); } EXPORT_SYMBOL(tty_driver_flush_buffer); /** * tty_unthrottle - flow control * @tty: terminal * * Indicate that a @tty may continue transmitting data down the stack. Takes * the &tty_struct->termios_rwsem to protect against parallel * throttle/unthrottle and also to ensure the driver can consistently reference * its own termios data at this point when implementing software flow control. * * Drivers should however remember that the stack can issue a throttle, then * change flow control method, then unthrottle. */ void tty_unthrottle(struct tty_struct *tty) { down_write(&tty->termios_rwsem); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && tty->ops->unthrottle) tty->ops->unthrottle(tty); tty->flow_change = TTY_FLOW_NO_CHANGE; up_write(&tty->termios_rwsem); } EXPORT_SYMBOL(tty_unthrottle); /** * tty_throttle_safe - flow control * @tty: terminal * * Indicate that a @tty should stop transmitting data down the stack. * tty_throttle_safe() will only attempt throttle if @tty->flow_change is * %TTY_THROTTLE_SAFE. Prevents an accidental throttle due to race conditions * when throttling is conditional on factors evaluated prior to throttling. * * Returns: %true if @tty is throttled (or was already throttled) */ bool tty_throttle_safe(struct tty_struct *tty) { bool ret = true; mutex_lock(&tty->throttle_mutex); if (!tty_throttled(tty)) { if (tty->flow_change != TTY_THROTTLE_SAFE) ret = false; else { set_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->throttle) tty->ops->throttle(tty); } } mutex_unlock(&tty->throttle_mutex); return ret; } /** * tty_unthrottle_safe - flow control * @tty: terminal * * Similar to tty_unthrottle() but will only attempt unthrottle if * @tty->flow_change is %TTY_UNTHROTTLE_SAFE. Prevents an accidental unthrottle * due to race conditions when unthrottling is conditional on factors evaluated * prior to unthrottling. * * Returns: %true if @tty is unthrottled (or was already unthrottled) */ bool tty_unthrottle_safe(struct tty_struct *tty) { bool ret = true; mutex_lock(&tty->throttle_mutex); if (tty_throttled(tty)) { if (tty->flow_change != TTY_UNTHROTTLE_SAFE) ret = false; else { clear_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->unthrottle) tty->ops->unthrottle(tty); } } mutex_unlock(&tty->throttle_mutex); return ret; } /** * tty_wait_until_sent - wait for I/O to finish * @tty: tty we are waiting for * @timeout: how long we will wait * * Wait for characters pending in a tty driver to hit the wire, or for a * timeout to occur (eg due to flow control). * * Locking: none */ void tty_wait_until_sent(struct tty_struct *tty, long timeout) { if (!timeout) timeout = MAX_SCHEDULE_TIMEOUT; timeout = wait_event_interruptible_timeout(tty->write_wait, !tty_chars_in_buffer(tty), timeout); if (timeout <= 0) return; if (timeout == MAX_SCHEDULE_TIMEOUT) timeout = 0; if (tty->ops->wait_until_sent) tty->ops->wait_until_sent(tty, timeout); } EXPORT_SYMBOL(tty_wait_until_sent); /* * Termios Helper Methods */ static void unset_locked_termios(struct tty_struct *tty, const struct ktermios *old) { struct ktermios *termios = &tty->termios; struct ktermios *locked = &tty->termios_locked; int i; #define NOSET_MASK(x, y, z) (x = ((x) & ~(z)) | ((y) & (z))) NOSET_MASK(termios->c_iflag, old->c_iflag, locked->c_iflag); NOSET_MASK(termios->c_oflag, old->c_oflag, locked->c_oflag); NOSET_MASK(termios->c_cflag, old->c_cflag, locked->c_cflag); NOSET_MASK(termios->c_lflag, old->c_lflag, locked->c_lflag); termios->c_line = locked->c_line ? old->c_line : termios->c_line; for (i = 0; i < NCCS; i++) termios->c_cc[i] = locked->c_cc[i] ? old->c_cc[i] : termios->c_cc[i]; /* FIXME: What should we do for i/ospeed */ } /** * tty_termios_copy_hw - copy hardware settings * @new: new termios * @old: old termios * * Propagate the hardware specific terminal setting bits from the @old termios * structure to the @new one. This is used in cases where the hardware does not * support reconfiguration or as a helper in some cases where only minimal * reconfiguration is supported. */ void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old) { /* The bits a dumb device handles in software. Smart devices need to always provide a set_termios method */ new->c_cflag &= HUPCL | CREAD | CLOCAL; new->c_cflag |= old->c_cflag & ~(HUPCL | CREAD | CLOCAL); new->c_ispeed = old->c_ispeed; new->c_ospeed = old->c_ospeed; } EXPORT_SYMBOL(tty_termios_copy_hw); /** * tty_termios_hw_change - check for setting change * @a: termios * @b: termios to compare * * Check if any of the bits that affect a dumb device have changed between the * two termios structures, or a speed change is needed. * * Returns: %true if change is needed */ bool tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b) { if (a->c_ispeed != b->c_ispeed || a->c_ospeed != b->c_ospeed) return true; if ((a->c_cflag ^ b->c_cflag) & ~(HUPCL | CREAD | CLOCAL)) return true; return false; } EXPORT_SYMBOL(tty_termios_hw_change); /** * tty_get_char_size - get size of a character * @cflag: termios cflag value * * Returns: size (in bits) of a character depending on @cflag's %CSIZE setting */ unsigned char tty_get_char_size(unsigned int cflag) { switch (cflag & CSIZE) { case CS5: return 5; case CS6: return 6; case CS7: return 7; case CS8: default: return 8; } } EXPORT_SYMBOL_GPL(tty_get_char_size); /** * tty_get_frame_size - get size of a frame * @cflag: termios cflag value * * Get the size (in bits) of a frame depending on @cflag's %CSIZE, %CSTOPB, and * %PARENB setting. The result is a sum of character size, start and stop bits * -- one bit each -- second stop bit (if set), and parity bit (if set). * * Returns: size (in bits) of a frame depending on @cflag's setting. */ unsigned char tty_get_frame_size(unsigned int cflag) { unsigned char bits = 2 + tty_get_char_size(cflag); if (cflag & CSTOPB) bits++; if (cflag & PARENB) bits++; if (cflag & ADDRB) bits++; return bits; } EXPORT_SYMBOL_GPL(tty_get_frame_size); /** * tty_set_termios - update termios values * @tty: tty to update * @new_termios: desired new value * * Perform updates to the termios values set on this @tty. A master pty's * termios should never be set. * * Locking: &tty_struct->termios_rwsem */ int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) { struct ktermios old_termios; struct tty_ldisc *ld; WARN_ON(tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER); /* * Perform the actual termios internal changes under lock. */ /* FIXME: we need to decide on some locking/ordering semantics for the set_termios notification eventually */ down_write(&tty->termios_rwsem); old_termios = tty->termios; tty->termios = *new_termios; unset_locked_termios(tty, &old_termios); /* Reset any ADDRB changes, ADDRB is changed through ->rs485_config() */ tty->termios.c_cflag ^= (tty->termios.c_cflag ^ old_termios.c_cflag) & ADDRB; if (tty->ops->set_termios) tty->ops->set_termios(tty, &old_termios); else tty_termios_copy_hw(&tty->termios, &old_termios); ld = tty_ldisc_ref(tty); if (ld != NULL) { if (ld->ops->set_termios) ld->ops->set_termios(tty, &old_termios); tty_ldisc_deref(ld); } up_write(&tty->termios_rwsem); return 0; } EXPORT_SYMBOL_GPL(tty_set_termios); /* * Translate a "termio" structure into a "termios". Ugh. */ __weak int user_termio_to_kernel_termios(struct ktermios *termios, struct termio __user *termio) { struct termio v; if (copy_from_user(&v, termio, sizeof(struct termio))) return -EFAULT; termios->c_iflag = (0xffff0000 & termios->c_iflag) | v.c_iflag; termios->c_oflag = (0xffff0000 & termios->c_oflag) | v.c_oflag; termios->c_cflag = (0xffff0000 & termios->c_cflag) | v.c_cflag; termios->c_lflag = (0xffff0000 & termios->c_lflag) | v.c_lflag; termios->c_line = (0xffff0000 & termios->c_lflag) | v.c_line; memcpy(termios->c_cc, v.c_cc, NCC); return 0; } /* * Translate a "termios" structure into a "termio". Ugh. */ __weak int kernel_termios_to_user_termio(struct termio __user *termio, struct ktermios *termios) { struct termio v; memset(&v, 0, sizeof(struct termio)); v.c_iflag = termios->c_iflag; v.c_oflag = termios->c_oflag; v.c_cflag = termios->c_cflag; v.c_lflag = termios->c_lflag; v.c_line = termios->c_line; memcpy(v.c_cc, termios->c_cc, NCC); return copy_to_user(termio, &v, sizeof(struct termio)); } #ifdef TCGETS2 __weak int user_termios_to_kernel_termios(struct ktermios *k, struct termios2 __user *u) { return copy_from_user(k, u, sizeof(struct termios2)); } __weak int kernel_termios_to_user_termios(struct termios2 __user *u, struct ktermios *k) { return copy_to_user(u, k, sizeof(struct termios2)); } __weak int user_termios_to_kernel_termios_1(struct ktermios *k, struct termios __user *u) { return copy_from_user(k, u, sizeof(struct termios)); } __weak int kernel_termios_to_user_termios_1(struct termios __user *u, struct ktermios *k) { return copy_to_user(u, k, sizeof(struct termios)); } #else __weak int user_termios_to_kernel_termios(struct ktermios *k, struct termios __user *u) { return copy_from_user(k, u, sizeof(struct termios)); } __weak int kernel_termios_to_user_termios(struct termios __user *u, struct ktermios *k) { return copy_to_user(u, k, sizeof(struct termios)); } #endif /* TCGETS2 */ /** * set_termios - set termios values for a tty * @tty: terminal device * @arg: user data * @opt: option information * * Helper function to prepare termios data and run necessary other functions * before using tty_set_termios() to do the actual changes. * * Locking: called functions take &tty_struct->ldisc_sem and * &tty_struct->termios_rwsem locks * * Returns: 0 on success, an error otherwise */ static int set_termios(struct tty_struct *tty, void __user *arg, int opt) { struct ktermios tmp_termios; struct tty_ldisc *ld; int retval = tty_check_change(tty); if (retval) return retval; down_read(&tty->termios_rwsem); tmp_termios = tty->termios; up_read(&tty->termios_rwsem); if (opt & TERMIOS_TERMIO) { if (user_termio_to_kernel_termios(&tmp_termios, (struct termio __user *)arg)) return -EFAULT; #ifdef TCGETS2 } else if (opt & TERMIOS_OLD) { if (user_termios_to_kernel_termios_1(&tmp_termios, (struct termios __user *)arg)) return -EFAULT; } else { if (user_termios_to_kernel_termios(&tmp_termios, (struct termios2 __user *)arg)) return -EFAULT; } #else } else if (user_termios_to_kernel_termios(&tmp_termios, (struct termios __user *)arg)) return -EFAULT; #endif /* If old style Bfoo values are used then load c_ispeed/c_ospeed * with the real speed so its unconditionally usable */ tmp_termios.c_ispeed = tty_termios_input_baud_rate(&tmp_termios); tmp_termios.c_ospeed = tty_termios_baud_rate(&tmp_termios); if (opt & (TERMIOS_FLUSH|TERMIOS_WAIT)) { retry_write_wait: retval = wait_event_interruptible(tty->write_wait, !tty_chars_in_buffer(tty)); if (retval < 0) return retval; if (tty_write_lock(tty, false) < 0) goto retry_write_wait; /* Racing writer? */ if (tty_chars_in_buffer(tty)) { tty_write_unlock(tty); goto retry_write_wait; } ld = tty_ldisc_ref(tty); if (ld != NULL) { if ((opt & TERMIOS_FLUSH) && ld->ops->flush_buffer) ld->ops->flush_buffer(tty); tty_ldisc_deref(ld); } if ((opt & TERMIOS_WAIT) && tty->ops->wait_until_sent) { tty->ops->wait_until_sent(tty, 0); if (signal_pending(current)) { tty_write_unlock(tty); return -ERESTARTSYS; } } tty_set_termios(tty, &tmp_termios); tty_write_unlock(tty); } else { tty_set_termios(tty, &tmp_termios); } /* FIXME: Arguably if tmp_termios == tty->termios AND the actual requested termios was not tmp_termios then we may want to return an error as no user requested change has succeeded */ return 0; } static void copy_termios(struct tty_struct *tty, struct ktermios *kterm) { down_read(&tty->termios_rwsem); *kterm = tty->termios; up_read(&tty->termios_rwsem); } static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm) { down_read(&tty->termios_rwsem); *kterm = tty->termios_locked; up_read(&tty->termios_rwsem); } static int get_termio(struct tty_struct *tty, struct termio __user *termio) { struct ktermios kterm; copy_termios(tty, &kterm); if (kernel_termios_to_user_termio(termio, &kterm)) return -EFAULT; return 0; } #ifdef TIOCGETP /* * These are deprecated, but there is limited support.. * * The "sg_flags" translation is a joke.. */ static int get_sgflags(struct tty_struct *tty) { int flags = 0; if (!L_ICANON(tty)) { if (L_ISIG(tty)) flags |= 0x02; /* cbreak */ else flags |= 0x20; /* raw */ } if (L_ECHO(tty)) flags |= 0x08; /* echo */ if (O_OPOST(tty)) if (O_ONLCR(tty)) flags |= 0x10; /* crmod */ return flags; } static int get_sgttyb(struct tty_struct *tty, struct sgttyb __user *sgttyb) { struct sgttyb tmp; down_read(&tty->termios_rwsem); tmp.sg_ispeed = tty->termios.c_ispeed; tmp.sg_ospeed = tty->termios.c_ospeed; tmp.sg_erase = tty->termios.c_cc[VERASE]; tmp.sg_kill = tty->termios.c_cc[VKILL]; tmp.sg_flags = get_sgflags(tty); up_read(&tty->termios_rwsem); return copy_to_user(sgttyb, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static void set_sgflags(struct ktermios *termios, int flags) { termios->c_iflag = ICRNL | IXON; termios->c_oflag = 0; termios->c_lflag = ISIG | ICANON; if (flags & 0x02) { /* cbreak */ termios->c_iflag = 0; termios->c_lflag &= ~ICANON; } if (flags & 0x08) { /* echo */ termios->c_lflag |= ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN; } if (flags & 0x10) { /* crmod */ termios->c_oflag |= OPOST | ONLCR; } if (flags & 0x20) { /* raw */ termios->c_iflag = 0; termios->c_lflag &= ~(ISIG | ICANON); } if (!(termios->c_lflag & ICANON)) { termios->c_cc[VMIN] = 1; termios->c_cc[VTIME] = 0; } } /** * set_sgttyb - set legacy terminal values * @tty: tty structure * @sgttyb: pointer to old style terminal structure * * Updates a terminal from the legacy BSD style terminal information structure. * * Locking: &tty_struct->termios_rwsem * * Returns: 0 on success, an error otherwise */ static int set_sgttyb(struct tty_struct *tty, struct sgttyb __user *sgttyb) { int retval; struct sgttyb tmp; struct ktermios termios; retval = tty_check_change(tty); if (retval) return retval; if (copy_from_user(&tmp, sgttyb, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); termios = tty->termios; termios.c_cc[VERASE] = tmp.sg_erase; termios.c_cc[VKILL] = tmp.sg_kill; set_sgflags(&termios, tmp.sg_flags); /* Try and encode into Bfoo format */ tty_termios_encode_baud_rate(&termios, termios.c_ispeed, termios.c_ospeed); up_write(&tty->termios_rwsem); tty_set_termios(tty, &termios); return 0; } #endif #ifdef TIOCGETC static int get_tchars(struct tty_struct *tty, struct tchars __user *tchars) { struct tchars tmp; down_read(&tty->termios_rwsem); tmp.t_intrc = tty->termios.c_cc[VINTR]; tmp.t_quitc = tty->termios.c_cc[VQUIT]; tmp.t_startc = tty->termios.c_cc[VSTART]; tmp.t_stopc = tty->termios.c_cc[VSTOP]; tmp.t_eofc = tty->termios.c_cc[VEOF]; tmp.t_brkc = tty->termios.c_cc[VEOL2]; /* what is brkc anyway? */ up_read(&tty->termios_rwsem); return copy_to_user(tchars, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static int set_tchars(struct tty_struct *tty, struct tchars __user *tchars) { struct tchars tmp; if (copy_from_user(&tmp, tchars, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); tty->termios.c_cc[VINTR] = tmp.t_intrc; tty->termios.c_cc[VQUIT] = tmp.t_quitc; tty->termios.c_cc[VSTART] = tmp.t_startc; tty->termios.c_cc[VSTOP] = tmp.t_stopc; tty->termios.c_cc[VEOF] = tmp.t_eofc; tty->termios.c_cc[VEOL2] = tmp.t_brkc; /* what is brkc anyway? */ up_write(&tty->termios_rwsem); return 0; } #endif #ifdef TIOCGLTC static int get_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars) { struct ltchars tmp; down_read(&tty->termios_rwsem); tmp.t_suspc = tty->termios.c_cc[VSUSP]; /* what is dsuspc anyway? */ tmp.t_dsuspc = tty->termios.c_cc[VSUSP]; tmp.t_rprntc = tty->termios.c_cc[VREPRINT]; /* what is flushc anyway? */ tmp.t_flushc = tty->termios.c_cc[VEOL2]; tmp.t_werasc = tty->termios.c_cc[VWERASE]; tmp.t_lnextc = tty->termios.c_cc[VLNEXT]; up_read(&tty->termios_rwsem); return copy_to_user(ltchars, &tmp, sizeof(tmp)) ? -EFAULT : 0; } static int set_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars) { struct ltchars tmp; if (copy_from_user(&tmp, ltchars, sizeof(tmp))) return -EFAULT; down_write(&tty->termios_rwsem); tty->termios.c_cc[VSUSP] = tmp.t_suspc; /* what is dsuspc anyway? */ tty->termios.c_cc[VEOL2] = tmp.t_dsuspc; tty->termios.c_cc[VREPRINT] = tmp.t_rprntc; /* what is flushc anyway? */ tty->termios.c_cc[VEOL2] = tmp.t_flushc; tty->termios.c_cc[VWERASE] = tmp.t_werasc; tty->termios.c_cc[VLNEXT] = tmp.t_lnextc; up_write(&tty->termios_rwsem); return 0; } #endif /** * tty_change_softcar - carrier change ioctl helper * @tty: tty to update * @enable: enable/disable %CLOCAL * * Perform a change to the %CLOCAL state and call into the driver layer to make * it visible. * * Locking: &tty_struct->termios_rwsem. * * Returns: 0 on success, an error otherwise */ static int tty_change_softcar(struct tty_struct *tty, bool enable) { int ret = 0; struct ktermios old; tcflag_t bit = enable ? CLOCAL : 0; down_write(&tty->termios_rwsem); old = tty->termios; tty->termios.c_cflag &= ~CLOCAL; tty->termios.c_cflag |= bit; if (tty->ops->set_termios) tty->ops->set_termios(tty, &old); if (C_CLOCAL(tty) != bit) ret = -EINVAL; up_write(&tty->termios_rwsem); return ret; } /** * tty_mode_ioctl - mode related ioctls * @tty: tty for the ioctl * @cmd: command * @arg: ioctl argument * * Perform non-line discipline specific mode control ioctls. This is designed * to be called by line disciplines to ensure they provide consistent mode * setting. */ int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct tty_struct *real_tty; void __user *p = (void __user *)arg; int ret = 0; struct ktermios kterm; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) real_tty = tty->link; else real_tty = tty; switch (cmd) { #ifdef TIOCGETP case TIOCGETP: return get_sgttyb(real_tty, (struct sgttyb __user *) arg); case TIOCSETP: case TIOCSETN: return set_sgttyb(real_tty, (struct sgttyb __user *) arg); #endif #ifdef TIOCGETC case TIOCGETC: return get_tchars(real_tty, p); case TIOCSETC: return set_tchars(real_tty, p); #endif #ifdef TIOCGLTC case TIOCGLTC: return get_ltchars(real_tty, p); case TIOCSLTC: return set_ltchars(real_tty, p); #endif case TCSETSF: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT | TERMIOS_OLD); case TCSETSW: return set_termios(real_tty, p, TERMIOS_WAIT | TERMIOS_OLD); case TCSETS: return set_termios(real_tty, p, TERMIOS_OLD); #ifndef TCGETS2 case TCGETS: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; #else case TCGETS: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TCGETS2: copy_termios(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm)) ret = -EFAULT; return ret; case TCSETSF2: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT); case TCSETSW2: return set_termios(real_tty, p, TERMIOS_WAIT); case TCSETS2: return set_termios(real_tty, p, 0); #endif case TCGETA: return get_termio(real_tty, p); case TCSETAF: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT | TERMIOS_TERMIO); case TCSETAW: return set_termios(real_tty, p, TERMIOS_WAIT | TERMIOS_TERMIO); case TCSETA: return set_termios(real_tty, p, TERMIOS_TERMIO); #ifndef TCGETS2 case TIOCGLCKTRMIOS: copy_termios_locked(real_tty, &kterm); if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios(&kterm, (struct termios __user *) arg)) return -EFAULT; down_write(&real_tty->termios_rwsem); real_tty->termios_locked = kterm; up_write(&real_tty->termios_rwsem); return 0; #else case TIOCGLCKTRMIOS: copy_termios_locked(real_tty, &kterm); if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios_1(&kterm, (struct termios __user *) arg)) return -EFAULT; down_write(&real_tty->termios_rwsem); real_tty->termios_locked = kterm; up_write(&real_tty->termios_rwsem); return ret; #endif #ifdef TCGETX case TCGETX: case TCSETX: case TCSETXW: case TCSETXF: return -ENOTTY; #endif case TIOCGSOFTCAR: copy_termios(real_tty, &kterm); ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0, (int __user *)arg); return ret; case TIOCSSOFTCAR: if (get_user(arg, (unsigned int __user *) arg)) return -EFAULT; return tty_change_softcar(real_tty, arg); default: return -ENOIOCTLCMD; } } EXPORT_SYMBOL_GPL(tty_mode_ioctl); /* Caller guarantees ldisc reference is held */ static int __tty_perform_flush(struct tty_struct *tty, unsigned long arg) { struct tty_ldisc *ld = tty->ldisc; switch (arg) { case TCIFLUSH: if (ld && ld->ops->flush_buffer) { ld->ops->flush_buffer(tty); tty_unthrottle(tty); } break; case TCIOFLUSH: if (ld && ld->ops->flush_buffer) { ld->ops->flush_buffer(tty); tty_unthrottle(tty); } fallthrough; case TCOFLUSH: tty_driver_flush_buffer(tty); break; default: return -EINVAL; } return 0; } int tty_perform_flush(struct tty_struct *tty, unsigned long arg) { struct tty_ldisc *ld; int retval = tty_check_change(tty); if (retval) return retval; ld = tty_ldisc_ref_wait(tty); retval = __tty_perform_flush(tty, arg); if (ld) tty_ldisc_deref(ld); return retval; } EXPORT_SYMBOL_GPL(tty_perform_flush); int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { int retval; switch (cmd) { case TCXONC: retval = tty_check_change(tty); if (retval) return retval; switch (arg) { case TCOOFF: spin_lock_irq(&tty->flow.lock); if (!tty->flow.tco_stopped) { tty->flow.tco_stopped = true; __stop_tty(tty); } spin_unlock_irq(&tty->flow.lock); break; case TCOON: spin_lock_irq(&tty->flow.lock); if (tty->flow.tco_stopped) { tty->flow.tco_stopped = false; __start_tty(tty); } spin_unlock_irq(&tty->flow.lock); break; case TCIOFF: if (STOP_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, STOP_CHAR(tty)); break; case TCION: if (START_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, START_CHAR(tty)); break; default: return -EINVAL; } return retval; case TCFLSH: retval = tty_check_change(tty); if (retval) return retval; return __tty_perform_flush(tty, arg); default: /* Try the mode commands */ return tty_mode_ioctl(tty, cmd, arg); } } EXPORT_SYMBOL(n_tty_ioctl_helper); |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | /* * include/net/tipc.h: Include file for TIPC message header routines * * Copyright (c) 2017 Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _TIPC_HDR_H #define _TIPC_HDR_H #include <linux/random.h> #define KEEPALIVE_MSG_MASK 0x0e080000 /* LINK_PROTOCOL + MSG_IS_KEEPALIVE */ struct tipc_basic_hdr { __be32 w[4]; }; static inline __be32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr) { u32 w0 = ntohl(hdr->w[0]); bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK; __be32 key; /* Return source node identity as key */ if (likely(!keepalive_msg)) return hdr->w[3]; /* Spread PROBE/PROBE_REPLY messages across the cores */ get_random_bytes(&key, sizeof(key)); return key; } #endif |
8514 960 960 584 29 560 584 560 8 21 3067 3509 3510 3468 18 43 1933 1725 3138 3128 3131 1998 1435 3133 1741 1739 1325 1739 24 81 111 84 84 59 60 6 6 25 23 1399 1400 1401 1399 170 3277 995 86 329 2346 1486 945 1142 3621 3448 177 3568 90 3620 3044 1041 2963 659 2341 3511 3508 3510 3623 13 3620 1 224 3263 584 8 3404 168 822 1683 14 14 10479 10486 10484 10539 4998 10478 9132 2 3445 222 2 60 36 24 60 1314 1316 1315 2418 2418 2417 2418 492 101 102 2 28 73 50 30 265 260 3 72 4 4 4 3 81 81 80 36 80 1886 1883 83 14 14 14 81 6 6 6 6 6 19 19 150 150 150 647 647 646 645 48 5 68 22 644 648 22 16 21 647 647 1 1 2 2 2 1287 1289 1288 1289 645 645 59 35 34 58 644 59 646 259 258 259 259 259 259 259 416 415 438 23 415 10481 1626 9858 202 10483 1228 4418 17 17 6575 5791 964 155 155 10480 10478 10479 4365 8123 392 8114 398 7681 1111 8189 363 7410 1754 8438 190 8184 354 1 15 2757 3774 335 5788 3650 10015 5 9650 9654 9649 9616 9648 9651 7536 8468 2045 2044 1602 1602 43 4 9 15 25 15 15 14 15 44 960 232 232 233 10 3589 3587 233 3387 585 8 4385 1089 3933 8393 8393 8403 8398 791 6518 8401 102 102 102 395 125 283 4293 4295 4295 4294 2 3492 3493 3492 5 3590 3589 3589 8 2 3582 2 5 5 5 5 1 4 3583 3579 3584 3578 3576 3578 3575 134 4258 1147 3484 3670 1055 1055 1054 1145 3487 3668 1054 1343 1233 183 5 4 354 21 31 353 373 391 392 392 116 95 323 26 386 8 390 392 8 386 9 386 374 6 368 31 31 31 31 6 384 392 386 8 323 116 6 386 368 31 31 31 31 31 458 508 3 1 1 3181 28 2637 721 477 9 3 3 6 705 412 355 355 355 2 72 72 36 72 317 317 317 316 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 | // SPDX-License-Identifier: GPL-2.0-only /* * fs/dcache.c * * Complete reimplementation * (C) 1997 Thomas Schoebel-Theuer, * with heavy changes by Linus Torvalds */ /* * Notes on the allocation strategy: * * The dcache is a master of the icache - whenever a dcache entry * exists, the inode will always exist. "iput()" is done either when * the dcache entry is deleted or garbage collected. */ #include <linux/ratelimit.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/fs.h> #include <linux/fscrypt.h> #include <linux/fsnotify.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/cache.h> #include <linux/export.h> #include <linux/security.h> #include <linux/seqlock.h> #include <linux/memblock.h> #include <linux/bit_spinlock.h> #include <linux/rculist_bl.h> #include <linux/list_lru.h> #include "internal.h" #include "mount.h" /* * Usage: * dcache->d_inode->i_lock protects: * - i_dentry, d_u.d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_roots bl list spinlock protects: * - the s_roots list (see __d_drop) * dentry->d_sb->s_dentry_lru_lock protects: * - the dcache lru lists and counters * d_lock protects: * - d_flags * - d_name * - d_lru * - d_count * - d_unhashed() * - d_parent and d_chilren * - childrens' d_sib and d_parent * - d_u.d_alias, d_inode * * Ordering: * dentry->d_inode->i_lock * dentry->d_lock * dentry->d_sb->s_dentry_lru_lock * dcache_hash_bucket lock * s_roots lock * * If there is an ancestor relationship: * dentry->d_parent->...->d_parent->d_lock * ... * dentry->d_parent->d_lock * dentry->d_lock * * If no ancestor relationship: * arbitrary, since it's serialized on rename_lock */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __ro_after_init; const struct qstr empty_name = QSTR_INIT("", 0); EXPORT_SYMBOL(empty_name); const struct qstr slash_name = QSTR_INIT("/", 1); EXPORT_SYMBOL(slash_name); const struct qstr dotdot_name = QSTR_INIT("..", 2); EXPORT_SYMBOL(dotdot_name); /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try * to make this good - I've just made it work. * * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ static unsigned int d_hash_shift __ro_after_init; static struct hlist_bl_head *dentry_hashtable __ro_after_init; static inline struct hlist_bl_head *d_hash(unsigned int hash) { return dentry_hashtable + (hash >> d_hash_shift); } #define IN_LOOKUP_SHIFT 10 static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT]; static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT); } struct dentry_stat_t { long nr_dentry; long nr_unused; long age_limit; /* age in seconds */ long want_pages; /* pages requested by system */ long nr_negative; /* # of unused negative dentries */ long dummy; /* Reserved for future use */ }; static DEFINE_PER_CPU(long, nr_dentry); static DEFINE_PER_CPU(long, nr_dentry_unused); static DEFINE_PER_CPU(long, nr_dentry_negative); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) /* Statistics gathering. */ static struct dentry_stat_t dentry_stat = { .age_limit = 45, }; /* * Here we resort to our own counters instead of using generic per-cpu counters * for consistency with what the vfs inode code does. We are expected to harvest * better code and performance by having our own specialized counters. * * Please note that the loop is done over all possible CPUs, not over all online * CPUs. The reason for this is that we don't want to play games with CPUs going * on and off. If one of them goes off, we will just keep their counters. * * glommer: See cffbc8a for details, and if you ever intend to change this, * please update all vfs counters to match. */ static long get_nr_dentry(void) { int i; long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_dentry, i); return sum < 0 ? 0 : sum; } static long get_nr_dentry_unused(void) { int i; long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_dentry_unused, i); return sum < 0 ? 0 : sum; } static long get_nr_dentry_negative(void) { int i; long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_dentry_negative, i); return sum < 0 ? 0 : sum; } static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); dentry_stat.nr_unused = get_nr_dentry_unused(); dentry_stat.nr_negative = get_nr_dentry_negative(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static struct ctl_table fs_dcache_sysctls[] = { { .procname = "dentry-state", .data = &dentry_stat, .maxlen = 6*sizeof(long), .mode = 0444, .proc_handler = proc_nr_dentry, }, }; static int __init init_fs_dcache_sysctls(void) { register_sysctl_init("fs", fs_dcache_sysctls); return 0; } fs_initcall(init_fs_dcache_sysctls); #endif /* * Compare 2 name strings, return 0 if they match, otherwise non-zero. * The strings are both count bytes long, and count is non-zero. */ #ifdef CONFIG_DCACHE_WORD_ACCESS #include <asm/word-at-a-time.h> /* * NOTE! 'cs' and 'scount' come from a dentry, so it has a * aligned allocation for this particular component. We don't * strictly need the load_unaligned_zeropad() safety, but it * doesn't hurt either. * * In contrast, 'ct' and 'tcount' can be from a pathname, and do * need the careful unaligned handling. */ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { unsigned long a,b,mask; for (;;) { a = read_word_at_a_time(cs); b = load_unaligned_zeropad(ct); if (tcount < sizeof(unsigned long)) break; if (unlikely(a != b)) return 1; cs += sizeof(unsigned long); ct += sizeof(unsigned long); tcount -= sizeof(unsigned long); if (!tcount) return 0; } mask = bytemask_from_count(tcount); return unlikely(!!((a ^ b) & mask)); } #else static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) { do { if (*cs != *ct) return 1; cs++; ct++; tcount--; } while (tcount); return 0; } #endif static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) { /* * Be careful about RCU walk racing with rename: * use 'READ_ONCE' to fetch the name pointer. * * NOTE! Even if a rename will mean that the length * was not loaded atomically, we don't care. The * RCU walk will check the sequence count eventually, * and catch it. And we won't overrun the buffer, * because we're reading the name pointer atomically, * and a dentry name is guaranteed to be properly * terminated with a NUL byte. * * End result: even if 'len' is wrong, we'll exit * early because the data cannot match (there can * be no NUL in the ct/tcount data) */ const unsigned char *cs = READ_ONCE(dentry->d_name.name); return dentry_string_cmp(cs, ct, tcount); } struct external_name { union { atomic_t count; struct rcu_head head; } u; unsigned char name[]; }; static inline struct external_name *external_name(struct dentry *dentry) { return container_of(dentry->d_name.name, struct external_name, name[0]); } static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); kmem_cache_free(dentry_cache, dentry); } static void __d_free_external(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); } static inline int dname_external(const struct dentry *dentry) { return dentry->d_name.name != dentry->d_iname; } void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) { spin_lock(&dentry->d_lock); name->name = dentry->d_name; if (unlikely(dname_external(dentry))) { atomic_inc(&external_name(dentry)->u.count); } else { memcpy(name->inline_name, dentry->d_iname, dentry->d_name.len + 1); name->name.name = name->inline_name; } spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(take_dentry_name_snapshot); void release_dentry_name_snapshot(struct name_snapshot *name) { if (unlikely(name->name.name != name->inline_name)) { struct external_name *p; p = container_of(name->name.name, struct external_name, name[0]); if (unlikely(atomic_dec_and_test(&p->u.count))) kfree_rcu(p, u.head); } } EXPORT_SYMBOL(release_dentry_name_snapshot); static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) { unsigned flags; dentry->d_inode = inode; flags = READ_ONCE(dentry->d_flags); flags &= ~DCACHE_ENTRY_TYPE; flags |= type_flags; smp_store_release(&dentry->d_flags, flags); } static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); flags &= ~DCACHE_ENTRY_TYPE; WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; if (dentry->d_flags & DCACHE_LRU_LIST) this_cpu_inc(nr_dentry_negative); } static void dentry_free(struct dentry *dentry) { WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); if (likely(atomic_dec_and_test(&p->u.count))) { call_rcu(&dentry->d_u.d_rcu, __d_free_external); return; } } /* if dentry was never visible to RCU, immediate free is OK */ if (dentry->d_flags & DCACHE_NORCU) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); } /* * Release the dentry's inode, using the filesystem * d_iput() operation if defined. */ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_lock) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else iput(inode); } /* * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry * is in use - which includes both the "real" per-superblock * LRU list _and_ the DCACHE_SHRINK_LIST use. * * The DCACHE_SHRINK_LIST bit is set whenever the dentry is * on the shrink list (ie not on the superblock LRU list). * * The per-cpu "nr_dentry_unused" counters are updated with * the DCACHE_LRU_LIST bit. * * The per-cpu "nr_dentry_negative" counters are only updated * when deleted from or added to the per-superblock LRU list, not * from/to the shrink list. That is to avoid an unneeded dec/inc * pair when moving from LRU to shrink list in select_collect(). * * These helper functions make sure we always follow the * rules. d_lock must be held by the caller. */ #define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x)) static void d_lru_add(struct dentry *dentry) { D_FLAG_VERIFY(dentry, 0); dentry->d_flags |= DCACHE_LRU_LIST; this_cpu_inc(nr_dentry_unused); if (d_is_negative(dentry)) this_cpu_inc(nr_dentry_negative); WARN_ON_ONCE(!list_lru_add_obj( &dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } static void d_lru_del(struct dentry *dentry) { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); if (d_is_negative(dentry)) this_cpu_dec(nr_dentry_negative); WARN_ON_ONCE(!list_lru_del_obj( &dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } static void d_shrink_del(struct dentry *dentry) { D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); list_del_init(&dentry->d_lru); dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); this_cpu_dec(nr_dentry_unused); } static void d_shrink_add(struct dentry *dentry, struct list_head *list) { D_FLAG_VERIFY(dentry, 0); list_add(&dentry->d_lru, list); dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST; this_cpu_inc(nr_dentry_unused); } /* * These can only be called under the global LRU lock, ie during the * callback for freeing the LRU list. "isolate" removes it from the * LRU lists entirely, while shrink_move moves it to the indicated * private list. */ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry) { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); if (d_is_negative(dentry)) this_cpu_dec(nr_dentry_negative); list_lru_isolate(lru, &dentry->d_lru); } static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, struct list_head *list) { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags |= DCACHE_SHRINK_LIST; if (d_is_negative(dentry)) this_cpu_dec(nr_dentry_negative); list_lru_isolate_move(lru, &dentry->d_lru, list); } static void ___d_drop(struct dentry *dentry) { struct hlist_bl_head *b; /* * Hashed dentries are normally on the dentry hashtable, * with the exception of those newly allocated by * d_obtain_root, which are always IS_ROOT: */ if (unlikely(IS_ROOT(dentry))) b = &dentry->d_sb->s_roots; else b = d_hash(dentry->d_name.hash); hlist_bl_lock(b); __hlist_bl_del(&dentry->d_hash); hlist_bl_unlock(b); } void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { ___d_drop(dentry); dentry->d_hash.pprev = NULL; write_seqcount_invalidate(&dentry->d_seq); } } EXPORT_SYMBOL(__d_drop); /** * d_drop - drop a dentry * @dentry: dentry to drop * * d_drop() unhashes the entry from the parent dentry hashes, so that it won't * be found through a VFS lookup any more. Note that this is different from * deleting the dentry - d_delete will try to mark the dentry negative if * possible, giving a successful _negative_ lookup, while d_drop will * just make the cache lookup fail. * * d_drop() is used mainly for stuff that wants to invalidate a dentry for some * reason (NFS timeouts or autofs deletes). * * __d_drop requires dentry->d_lock * * ___d_drop doesn't mark dentry as "unhashed" * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). */ void d_drop(struct dentry *dentry) { spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(d_drop); static inline void dentry_unlist(struct dentry *dentry) { struct dentry *next; /* * Inform d_walk() and shrink_dentry_list() that we are no longer * attached to the dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; if (unlikely(hlist_unhashed(&dentry->d_sib))) return; __hlist_del(&dentry->d_sib); /* * Cursors can move around the list of children. While we'd been * a normal list member, it didn't matter - ->d_sib.next would've * been updated. However, from now on it won't be and for the * things like d_walk() it might end up with a nasty surprise. * Normally d_walk() doesn't care about cursors moving around - * ->d_lock on parent prevents that and since a cursor has no children * of its own, we get through it without ever unlocking the parent. * There is one exception, though - if we ascend from a child that * gets killed as soon as we unlock it, the next sibling is found * using the value left in its ->d_sib.next. And if _that_ * pointed to a cursor, and cursor got moved (e.g. by lseek()) * before d_walk() regains parent->d_lock, we'll end up skipping * everything the cursor had been moved past. * * Solution: make sure that the pointer left behind in ->d_sib.next * points to something that won't be moving around. I.e. skip the * cursors. */ while (dentry->d_sib.next) { next = hlist_entry(dentry->d_sib.next, struct dentry, d_sib); if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) break; dentry->d_sib.next = next->d_sib.next; } } static struct dentry *__dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; bool can_free = true; /* * The dentry is now unrecoverably dead to the world. */ lockref_mark_dead(&dentry->d_lockref); /* * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. */ if (dentry->d_flags & DCACHE_OP_PRUNE) dentry->d_op->d_prune(dentry); if (dentry->d_flags & DCACHE_LRU_LIST) { if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) d_lru_del(dentry); } /* if it was on the hash then remove it */ __d_drop(dentry); if (dentry->d_inode) dentry_unlink_inode(dentry); else spin_unlock(&dentry->d_lock); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); cond_resched(); /* now that it's negative, ->d_parent is stable */ if (!IS_ROOT(dentry)) { parent = dentry->d_parent; spin_lock(&parent->d_lock); } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry_unlist(dentry); if (dentry->d_flags & DCACHE_SHRINK_LIST) can_free = false; spin_unlock(&dentry->d_lock); if (likely(can_free)) dentry_free(dentry); if (parent && --parent->d_lockref.count) { spin_unlock(&parent->d_lock); return NULL; } return parent; } /* * Lock a dentry for feeding it to __dentry_kill(). * Called under rcu_read_lock() and dentry->d_lock; the former * guarantees that nothing we access will be freed under us. * Note that dentry is *not* protected from concurrent dentry_kill(), * d_delete(), etc. * * Return false if dentry is busy. Otherwise, return true and have * that dentry's inode locked. */ static bool lock_for_kill(struct dentry *dentry) { struct inode *inode = dentry->d_inode; if (unlikely(dentry->d_lockref.count)) return false; if (!inode || likely(spin_trylock(&inode->i_lock))) return true; do { spin_unlock(&dentry->d_lock); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); if (likely(inode == dentry->d_inode)) break; spin_unlock(&inode->i_lock); inode = dentry->d_inode; } while (inode); if (likely(!dentry->d_lockref.count)) return true; if (inode) spin_unlock(&inode->i_lock); return false; } /* * Decide if dentry is worth retaining. Usually this is called with dentry * locked; if not locked, we are more limited and might not be able to tell * without a lock. False in this case means "punt to locked path and recheck". * * In case we aren't locked, these predicates are not "stable". However, it is * sufficient that at some point after we dropped the reference the dentry was * hashed and the flags had the proper value. Other dentry users may have * re-gotten a reference to the dentry and change that, but our work is done - * we can leave the dentry around with a zero refcount. */ static inline bool retain_dentry(struct dentry *dentry, bool locked) { unsigned int d_flags; smp_rmb(); d_flags = READ_ONCE(dentry->d_flags); // Unreachable? Nobody would be able to look it up, no point retaining if (unlikely(d_unhashed(dentry))) return false; // Same if it's disconnected if (unlikely(d_flags & DCACHE_DISCONNECTED)) return false; // ->d_delete() might tell us not to bother, but that requires // ->d_lock; can't decide without it if (unlikely(d_flags & DCACHE_OP_DELETE)) { if (!locked || dentry->d_op->d_delete(dentry)) return false; } // Explicitly told not to bother if (unlikely(d_flags & DCACHE_DONTCACHE)) return false; // At this point it looks like we ought to keep it. We also might // need to do something - put it on LRU if it wasn't there already // and mark it referenced if it was on LRU, but not marked yet. // Unfortunately, both actions require ->d_lock, so in lockless // case we'd have to punt rather than doing those. if (unlikely(!(d_flags & DCACHE_LRU_LIST))) { if (!locked) return false; d_lru_add(dentry); } else if (unlikely(!(d_flags & DCACHE_REFERENCED))) { if (!locked) return false; dentry->d_flags |= DCACHE_REFERENCED; } return true; } void d_mark_dontcache(struct inode *inode) { struct dentry *de; spin_lock(&inode->i_lock); hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) { spin_lock(&de->d_lock); de->d_flags |= DCACHE_DONTCACHE; spin_unlock(&de->d_lock); } inode->i_state |= I_DONTCACHE; spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_mark_dontcache); /* * Try to do a lockless dput(), and return whether that was successful. * * If unsuccessful, we return false, having already taken the dentry lock. * In that case refcount is guaranteed to be zero and we have already * decided that it's not worth keeping around. * * The caller needs to hold the RCU read lock, so that the dentry is * guaranteed to stay around even if the refcount goes down to zero! */ static inline bool fast_dput(struct dentry *dentry) { int ret; /* * try to decrement the lockref optimistically. */ ret = lockref_put_return(&dentry->d_lockref); /* * If the lockref_put_return() failed due to the lock being held * by somebody else, the fast path has failed. We will need to * get the lock, and then check the count again. */ if (unlikely(ret < 0)) { spin_lock(&dentry->d_lock); if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) { spin_unlock(&dentry->d_lock); return true; } dentry->d_lockref.count--; goto locked; } /* * If we weren't the last ref, we're done. */ if (ret) return true; /* * Can we decide that decrement of refcount is all we needed without * taking the lock? There's a very common case when it's all we need - * dentry looks like it ought to be retained and there's nothing else * to do. */ if (retain_dentry(dentry, false)) return true; /* * Either not worth retaining or we can't tell without the lock. * Get the lock, then. We've already decremented the refcount to 0, * but we'll need to re-check the situation after getting the lock. */ spin_lock(&dentry->d_lock); /* * Did somebody else grab a reference to it in the meantime, and * we're no longer the last user after all? Alternatively, somebody * else could have killed it and marked it dead. Either way, we * don't need to do anything else. */ locked: if (dentry->d_lockref.count || retain_dentry(dentry, true)) { spin_unlock(&dentry->d_lock); return true; } return false; } /* * This is dput * * This is complicated by the fact that we do not want to put * dentries that are no longer on any hash chain on the unused * list: we'd much rather just get rid of them immediately. * * However, that implies that we have to traverse the dentry * tree upwards to the parents which might _also_ now be * scheduled for deletion (it may have been only waiting for * its last child to go away). * * This tail recursion is done by hand as we don't want to depend * on the compiler to always get this right (gcc generally doesn't). * Real recursion would eat up our stack space. */ /* * dput - release a dentry * @dentry: dentry to release * * Release a dentry. This will drop the usage count and if appropriate * call the dentry unlink method as well as removing it from the queues and * releasing its resources. If the parent dentries were scheduled for release * they too may now get deleted. */ void dput(struct dentry *dentry) { if (!dentry) return; might_sleep(); rcu_read_lock(); if (likely(fast_dput(dentry))) { rcu_read_unlock(); return; } while (lock_for_kill(dentry)) { rcu_read_unlock(); dentry = __dentry_kill(dentry); if (!dentry) return; if (retain_dentry(dentry, true)) { spin_unlock(&dentry->d_lock); return; } rcu_read_lock(); } rcu_read_unlock(); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(dput); static void to_shrink_list(struct dentry *dentry, struct list_head *list) __must_hold(&dentry->d_lock) { if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); d_shrink_add(dentry, list); } } void dput_to_list(struct dentry *dentry, struct list_head *list) { rcu_read_lock(); if (likely(fast_dput(dentry))) { rcu_read_unlock(); return; } rcu_read_unlock(); to_shrink_list(dentry, list); spin_unlock(&dentry->d_lock); } struct dentry *dget_parent(struct dentry *dentry) { int gotref; struct dentry *ret; unsigned seq; /* * Do optimistic parent lookup without any * locking. */ rcu_read_lock(); seq = raw_seqcount_begin(&dentry->d_seq); ret = READ_ONCE(dentry->d_parent); gotref = lockref_get_not_zero(&ret->d_lockref); rcu_read_unlock(); if (likely(gotref)) { if (!read_seqcount_retry(&dentry->d_seq, seq)) return ret; dput(ret); } repeat: /* * Don't need rcu_dereference because we re-check it was correct under * the lock. */ rcu_read_lock(); ret = dentry->d_parent; spin_lock(&ret->d_lock); if (unlikely(ret != dentry->d_parent)) { spin_unlock(&ret->d_lock); rcu_read_unlock(); goto repeat; } rcu_read_unlock(); BUG_ON(!ret->d_lockref.count); ret->d_lockref.count++; spin_unlock(&ret->d_lock); return ret; } EXPORT_SYMBOL(dget_parent); static struct dentry * __d_find_any_alias(struct inode *inode) { struct dentry *alias; if (hlist_empty(&inode->i_dentry)) return NULL; alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); lockref_get(&alias->d_lockref); return alias; } /** * d_find_any_alias - find any alias for a given inode * @inode: inode to find an alias for * * If any aliases exist for the given inode, take and return a * reference for one of them. If no aliases exist, return %NULL. */ struct dentry *d_find_any_alias(struct inode *inode) { struct dentry *de; spin_lock(&inode->i_lock); de = __d_find_any_alias(inode); spin_unlock(&inode->i_lock); return de; } EXPORT_SYMBOL(d_find_any_alias); static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias; if (S_ISDIR(inode->i_mode)) return __d_find_any_alias(inode); hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); if (!d_unhashed(alias)) { dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } spin_unlock(&alias->d_lock); } return NULL; } /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. * Notice that if inode is a directory there can be only one alias and * it can be unhashed only if it has no children, or if it is the root * of a filesystem, or if the directory was renamed and d_revalidate * was the first vfs operation to notice. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer * any other hashed alias over that one. */ struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; if (!hlist_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); de = __d_find_alias(inode); spin_unlock(&inode->i_lock); } return de; } EXPORT_SYMBOL(d_find_alias); /* * Caller MUST be holding rcu_read_lock() and be guaranteed * that inode won't get freed until rcu_read_unlock(). */ struct dentry *d_find_alias_rcu(struct inode *inode) { struct hlist_head *l = &inode->i_dentry; struct dentry *de = NULL; spin_lock(&inode->i_lock); // ->i_dentry and ->i_rcu are colocated, but the latter won't be // used without having I_FREEING set, which means no aliases left if (likely(!(inode->i_state & I_FREEING) && !hlist_empty(l))) { if (S_ISDIR(inode->i_mode)) { de = hlist_entry(l->first, struct dentry, d_u.d_alias); } else { hlist_for_each_entry(de, l, d_u.d_alias) if (!d_unhashed(de)) break; } } spin_unlock(&inode->i_lock); return de; } /* * Try to kill dentries associated with this inode. * WARNING: you must own a reference to inode. */ void d_prune_aliases(struct inode *inode) { LIST_HEAD(dispose); struct dentry *dentry; spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_lockref.count) to_shrink_list(dentry, &dispose); spin_unlock(&dentry->d_lock); } spin_unlock(&inode->i_lock); shrink_dentry_list(&dispose); } EXPORT_SYMBOL(d_prune_aliases); static inline void shrink_kill(struct dentry *victim) { do { rcu_read_unlock(); victim = __dentry_kill(victim); rcu_read_lock(); } while (victim && lock_for_kill(victim)); rcu_read_unlock(); if (victim) spin_unlock(&victim->d_lock); } void shrink_dentry_list(struct list_head *list) { while (!list_empty(list)) { struct dentry *dentry; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); rcu_read_lock(); if (!lock_for_kill(dentry)) { bool can_free; rcu_read_unlock(); d_shrink_del(dentry); can_free = dentry->d_flags & DCACHE_DENTRY_KILLED; spin_unlock(&dentry->d_lock); if (can_free) dentry_free(dentry); continue; } d_shrink_del(dentry); shrink_kill(dentry); } } static enum lru_status dentry_lru_isolate(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) { struct list_head *freeable = arg; struct dentry *dentry = container_of(item, struct dentry, d_lru); /* * we are inverting the lru lock/dentry->d_lock here, * so use a trylock. If we fail to get the lock, just skip * it */ if (!spin_trylock(&dentry->d_lock)) return LRU_SKIP; /* * Referenced dentries are still in use. If they have active * counts, just remove them from the LRU. Otherwise give them * another pass through the LRU. */ if (dentry->d_lockref.count) { d_lru_isolate(lru, dentry); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } if (dentry->d_flags & DCACHE_REFERENCED) { dentry->d_flags &= ~DCACHE_REFERENCED; spin_unlock(&dentry->d_lock); /* * The list move itself will be made by the common LRU code. At * this point, we've dropped the dentry->d_lock but keep the * lru lock. This is safe to do, since every list movement is * protected by the lru lock even if both locks are held. * * This is guaranteed by the fact that all LRU management * functions are intermediated by the LRU API calls like * list_lru_add_obj and list_lru_del_obj. List movement in this file * only ever occur through this functions or through callbacks * like this one, that are called from the LRU API. * * The only exceptions to this are functions like * shrink_dentry_list, and code that first checks for the * DCACHE_SHRINK_LIST flag. Those are guaranteed to be * operating only with stack provided lists after they are * properly isolated from the main list. It is thus, always a * local access. */ return LRU_ROTATE; } d_lru_shrink_move(lru, dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } /** * prune_dcache_sb - shrink the dcache * @sb: superblock * @sc: shrink control, passed to list_lru_shrink_walk() * * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This * is done when we need more memory and called from the superblock shrinker * function. * * This function may fail to free any resources if all the dentries are in * use. */ long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc) { LIST_HEAD(dispose); long freed; freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc, dentry_lru_isolate, &dispose); shrink_dentry_list(&dispose); return freed; } static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) { struct list_head *freeable = arg; struct dentry *dentry = container_of(item, struct dentry, d_lru); /* * we are inverting the lru lock/dentry->d_lock here, * so use a trylock. If we fail to get the lock, just skip * it */ if (!spin_trylock(&dentry->d_lock)) return LRU_SKIP; d_lru_shrink_move(lru, dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock * * Shrink the dcache for the specified super block. This is used to free * the dcache before unmounting a file system. */ void shrink_dcache_sb(struct super_block *sb) { do { LIST_HEAD(dispose); list_lru_walk(&sb->s_dentry_lru, dentry_lru_isolate_shrink, &dispose, 1024); shrink_dentry_list(&dispose); } while (list_lru_count(&sb->s_dentry_lru) > 0); } EXPORT_SYMBOL(shrink_dcache_sb); /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk * @D_WALK_QUIT: quit walk * @D_WALK_NORETRY: quit when retry is needed * @D_WALK_SKIP: skip this dentry and its children */ enum d_walk_ret { D_WALK_CONTINUE, D_WALK_QUIT, D_WALK_NORETRY, D_WALK_SKIP, }; /** * d_walk - walk the dentry tree * @parent: start of walk * @data: data passed to @enter() and @finish() * @enter: callback when first entering the dentry * * The @enter() callbacks are called with d_lock held. */ static void d_walk(struct dentry *parent, void *data, enum d_walk_ret (*enter)(void *, struct dentry *)) { struct dentry *this_parent, *dentry; unsigned seq = 0; enum d_walk_ret ret; bool retry = true; again: read_seqbegin_or_lock(&rename_lock, &seq); this_parent = parent; spin_lock(&this_parent->d_lock); ret = enter(data, this_parent); switch (ret) { case D_WALK_CONTINUE: break; case D_WALK_QUIT: case D_WALK_SKIP: goto out_unlock; case D_WALK_NORETRY: retry = false; break; } repeat: dentry = d_first_child(this_parent); resume: hlist_for_each_entry_from(dentry, d_sib) { if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) continue; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); switch (ret) { case D_WALK_CONTINUE: break; case D_WALK_QUIT: spin_unlock(&dentry->d_lock); goto out_unlock; case D_WALK_NORETRY: retry = false; break; case D_WALK_SKIP: spin_unlock(&dentry->d_lock); continue; } if (!hlist_empty(&dentry->d_children)) { spin_unlock(&this_parent->d_lock); spin_release(&dentry->d_lock.dep_map, _RET_IP_); this_parent = dentry; spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ rcu_read_lock(); ascend: if (this_parent != parent) { dentry = this_parent; this_parent = dentry->d_parent; spin_unlock(&dentry->d_lock); spin_lock(&this_parent->d_lock); /* might go back up the wrong parent if we have had a rename. */ if (need_seqretry(&rename_lock, seq)) goto rename_retry; /* go into the first sibling still alive */ hlist_for_each_entry_continue(dentry, d_sib) { if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) { rcu_read_unlock(); goto resume; } } goto ascend; } if (need_seqretry(&rename_lock, seq)) goto rename_retry; rcu_read_unlock(); out_unlock: spin_unlock(&this_parent->d_lock); done_seqretry(&rename_lock, seq); return; rename_retry: spin_unlock(&this_parent->d_lock); rcu_read_unlock(); BUG_ON(seq & 1); if (!retry) return; seq = 1; goto again; } struct check_mount { struct vfsmount *mnt; unsigned int mounted; }; static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry) { struct check_mount *info = data; struct path path = { .mnt = info->mnt, .dentry = dentry }; if (likely(!d_mountpoint(dentry))) return D_WALK_CONTINUE; if (__path_is_mountpoint(&path)) { info->mounted = 1; return D_WALK_QUIT; } return D_WALK_CONTINUE; } /** * path_has_submounts - check for mounts over a dentry in the * current namespace. * @parent: path to check. * * Return true if the parent or its subdirectories contain * a mount point in the current namespace. */ int path_has_submounts(const struct path *parent) { struct check_mount data = { .mnt = parent->mnt, .mounted = 0 }; read_seqlock_excl(&mount_lock); d_walk(parent->dentry, &data, path_check_mount); read_sequnlock_excl(&mount_lock); return data.mounted; } EXPORT_SYMBOL(path_has_submounts); /* * Called by mount code to set a mountpoint and check if the mountpoint is * reachable (e.g. NFS can unhash a directory dentry and then the complete * subtree can become unreachable). * * Only one of d_invalidate() and d_set_mounted() must succeed. For * this reason take rename_lock and d_lock on dentry and ancestors. */ int d_set_mounted(struct dentry *dentry) { struct dentry *p; int ret = -ENOENT; write_seqlock(&rename_lock); for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { /* Need exclusion wrt. d_invalidate() */ spin_lock(&p->d_lock); if (unlikely(d_unhashed(p))) { spin_unlock(&p->d_lock); goto out; } spin_unlock(&p->d_lock); } spin_lock(&dentry->d_lock); if (!d_unlinked(dentry)) { ret = -EBUSY; if (!d_mountpoint(dentry)) { dentry->d_flags |= DCACHE_MOUNTED; ret = 0; } } spin_unlock(&dentry->d_lock); out: write_sequnlock(&rename_lock); return ret; } /* * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level * whenever the d_children list is non-empty and continue * searching. * * It returns zero iff there are no unused children, * otherwise it returns the number of children moved to * the end of the unused list. This may not be the total * number of unused children, because select_parent can * drop the lock and return early due to latency * constraints. */ struct select_data { struct dentry *start; union { long found; struct dentry *victim; }; struct list_head dispose; }; static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) { struct select_data *data = _data; enum d_walk_ret ret = D_WALK_CONTINUE; if (data->start == dentry) goto out; if (dentry->d_flags & DCACHE_SHRINK_LIST) { data->found++; } else if (!dentry->d_lockref.count) { to_shrink_list(dentry, &data->dispose); data->found++; } else if (dentry->d_lockref.count < 0) { data->found++; } /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ if (!list_empty(&data->dispose)) ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; out: return ret; } static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) { struct select_data *data = _data; enum d_walk_ret ret = D_WALK_CONTINUE; if (data->start == dentry) goto out; if (!dentry->d_lockref.count) { if (dentry->d_flags & DCACHE_SHRINK_LIST) { rcu_read_lock(); data->victim = dentry; return D_WALK_QUIT; } to_shrink_list(dentry, &data->dispose); } /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ if (!list_empty(&data->dispose)) ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; out: return ret; } /** * shrink_dcache_parent - prune dcache * @parent: parent of entries to prune * * Prune the dcache to remove unused children of the parent dentry. */ void shrink_dcache_parent(struct dentry *parent) { for (;;) { struct select_data data = {.start = parent}; INIT_LIST_HEAD(&data.dispose); d_walk(parent, &data, select_collect); if (!list_empty(&data.dispose)) { shrink_dentry_list(&data.dispose); continue; } cond_resched(); if (!data.found) break; data.victim = NULL; d_walk(parent, &data, select_collect2); if (data.victim) { spin_lock(&data.victim->d_lock); if (!lock_for_kill(data.victim)) { spin_unlock(&data.victim->d_lock); rcu_read_unlock(); } else { shrink_kill(data.victim); } } if (!list_empty(&data.dispose)) shrink_dentry_list(&data.dispose); } } EXPORT_SYMBOL(shrink_dcache_parent); static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) { /* it has busy descendents; complain about those instead */ if (!hlist_empty(&dentry->d_children)) return D_WALK_CONTINUE; /* root with refcount 1 is fine */ if (dentry == _data && dentry->d_lockref.count == 1) return D_WALK_CONTINUE; WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} " " still in use (%d) [unmount of %s %s]\n", dentry, dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry, dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); return D_WALK_CONTINUE; } static void do_one_tree(struct dentry *dentry) { shrink_dcache_parent(dentry); d_walk(dentry, dentry, umount_check); d_drop(dentry); dput(dentry); } /* * destroy the dentries attached to a superblock on unmounting */ void shrink_dcache_for_umount(struct super_block *sb) { struct dentry *dentry; WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); dentry = sb->s_root; sb->s_root = NULL; do_one_tree(dentry); while (!hlist_bl_empty(&sb->s_roots)) { dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash)); do_one_tree(dentry); } } static enum d_walk_ret find_submount(void *_data, struct dentry *dentry) { struct dentry **victim = _data; if (d_mountpoint(dentry)) { *victim = dget_dlock(dentry); return D_WALK_QUIT; } return D_WALK_CONTINUE; } /** * d_invalidate - detach submounts, prune dcache, and drop * @dentry: dentry to invalidate (aka detach, prune and drop) */ void d_invalidate(struct dentry *dentry) { bool had_submounts = false; spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); return; } __d_drop(dentry); spin_unlock(&dentry->d_lock); /* Negative dentries can be dropped without further checks */ if (!dentry->d_inode) return; shrink_dcache_parent(dentry); for (;;) { struct dentry *victim = NULL; d_walk(dentry, &victim, find_submount); if (!victim) { if (had_submounts) shrink_dcache_parent(dentry); return; } had_submounts = true; detach_mounts(victim); dput(victim); } } EXPORT_SYMBOL(d_invalidate); /** * __d_alloc - allocate a dcache entry * @sb: filesystem it will belong to * @name: qstr of the name * * Allocates a dentry. It returns %NULL if there is insufficient memory * available. On a success the dentry is returned. The name passed in is * copied and the copy passed in may be reused after this call. */ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { struct dentry *dentry; char *dname; int err; dentry = kmem_cache_alloc_lru(dentry_cache, &sb->s_dentry_lru, GFP_KERNEL); if (!dentry) return NULL; /* * We guarantee that the inline name is always NUL-terminated. * This way the memcpy() done by the name switching in rename * will still always have a NUL at the end, even if we might * be overwriting an internal NUL character */ dentry->d_iname[DNAME_INLINE_LEN-1] = 0; if (unlikely(!name)) { name = &slash_name; dname = dentry->d_iname; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); struct external_name *p = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); if (!p) { kmem_cache_free(dentry_cache, dentry); return NULL; } atomic_set(&p->u.count, 1); dname = p->name; } else { dname = dentry->d_iname; } dentry->d_name.len = name->len; dentry->d_name.hash = name->hash; memcpy(dname, name->name, name->len); dname[name->len] = 0; /* Make sure we always see the terminating NUL character */ smp_store_release(&dentry->d_name.name, dname); /* ^^^ */ dentry->d_lockref.count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); dentry->d_inode = NULL; dentry->d_parent = dentry; dentry->d_sb = sb; dentry->d_op = NULL; dentry->d_fsdata = NULL; INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_HLIST_HEAD(&dentry->d_children); INIT_HLIST_NODE(&dentry->d_u.d_alias); INIT_HLIST_NODE(&dentry->d_sib); d_set_d_op(dentry, dentry->d_sb->s_d_op); if (dentry->d_op && dentry->d_op->d_init) { err = dentry->d_op->d_init(dentry); if (err) { if (dname_external(dentry)) kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); return NULL; } } this_cpu_inc(nr_dentry); return dentry; } /** * d_alloc - allocate a dcache entry * @parent: parent of entry to allocate * @name: qstr of the name * * Allocates a dentry. It returns %NULL if there is insufficient memory * available. On a success the dentry is returned. The name passed in is * copied and the copy passed in may be reused after this call. */ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) { struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject * to concurrency here */ dentry->d_parent = dget_dlock(parent); hlist_add_head(&dentry->d_sib, &parent->d_children); spin_unlock(&parent->d_lock); return dentry; } EXPORT_SYMBOL(d_alloc); struct dentry *d_alloc_anon(struct super_block *sb) { return __d_alloc(sb, NULL); } EXPORT_SYMBOL(d_alloc_anon); struct dentry *d_alloc_cursor(struct dentry * parent) { struct dentry *dentry = d_alloc_anon(parent->d_sb); if (dentry) { dentry->d_flags |= DCACHE_DENTRY_CURSOR; dentry->d_parent = dget(parent); } return dentry; } /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock * @name: qstr of the name * * For a filesystem that just pins its dentries in memory and never * performs lookups at all, return an unhashed IS_ROOT dentry. * This is used for pipes, sockets et.al. - the stuff that should * never be anyone's children or parents. Unlike all other * dentries, these will not have RCU delay between dropping the * last reference and freeing them. * * The only user is alloc_file_pseudo() and that's what should * be considered a public interface. Don't use directly. */ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { static const struct dentry_operations anon_ops = { .d_dname = simple_dname }; struct dentry *dentry = __d_alloc(sb, name); if (likely(dentry)) { dentry->d_flags |= DCACHE_NORCU; if (!sb->s_d_op) d_set_d_op(dentry, &anon_ops); } return dentry; } struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; q.name = name; q.hash_len = hashlen_string(parent, name); return d_alloc(parent, &q); } EXPORT_SYMBOL(d_alloc_name); void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) { WARN_ON_ONCE(dentry->d_op); WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | DCACHE_OP_REAL)); dentry->d_op = op; if (!op) return; if (op->d_hash) dentry->d_flags |= DCACHE_OP_HASH; if (op->d_compare) dentry->d_flags |= DCACHE_OP_COMPARE; if (op->d_revalidate) dentry->d_flags |= DCACHE_OP_REVALIDATE; if (op->d_weak_revalidate) dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE; if (op->d_delete) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) dentry->d_flags |= DCACHE_OP_PRUNE; if (op->d_real) dentry->d_flags |= DCACHE_OP_REAL; } EXPORT_SYMBOL(d_set_d_op); static unsigned d_flags_for_inode(struct inode *inode) { unsigned add_flags = DCACHE_REGULAR_TYPE; if (!inode) return DCACHE_MISS_TYPE; if (S_ISDIR(inode->i_mode)) { add_flags = DCACHE_DIRECTORY_TYPE; if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { if (unlikely(!inode->i_op->lookup)) add_flags = DCACHE_AUTODIR_TYPE; else inode->i_opflags |= IOP_LOOKUP; } goto type_determined; } if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { if (unlikely(inode->i_op->get_link)) { add_flags = DCACHE_SYMLINK_TYPE; goto type_determined; } inode->i_opflags |= IOP_NOFOLLOW; } if (unlikely(!S_ISREG(inode->i_mode))) add_flags = DCACHE_SPECIAL_TYPE; type_determined: if (unlikely(IS_AUTOMOUNT(inode))) add_flags |= DCACHE_NEED_AUTOMOUNT; return add_flags; } static void __d_instantiate(struct dentry *dentry, struct inode *inode) { unsigned add_flags = d_flags_for_inode(inode); WARN_ON(d_in_lookup(dentry)); spin_lock(&dentry->d_lock); /* * Decrement negative dentry count if it was in the LRU list. */ if (dentry->d_flags & DCACHE_LRU_LIST) this_cpu_dec(nr_dentry_negative); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); fsnotify_update_flags(dentry); spin_unlock(&dentry->d_lock); } /** * d_instantiate - fill in inode information for a dentry * @entry: dentry to complete * @inode: inode to attach to this dentry * * Fill in inode information in the entry. * * This turns negative dentries into productive full members * of society. * * NOTE! This assumes that the inode count has been incremented * (or otherwise set) by the caller to indicate that it is now * in use by the dcache. */ void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); __d_instantiate(entry, inode); spin_unlock(&inode->i_lock); } } EXPORT_SYMBOL(d_instantiate); /* * This should be equivalent to d_instantiate() + unlock_new_inode(), * with lockdep-related part of unlock_new_inode() done before * anything else. Use that instead of open-coding d_instantiate()/ * unlock_new_inode() combinations. */ void d_instantiate_new(struct dentry *entry, struct inode *inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); BUG_ON(!inode); lockdep_annotate_inode_mutex_key(inode); security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); __d_instantiate(entry, inode); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_instantiate_new); struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; if (root_inode) { res = d_alloc_anon(root_inode->i_sb); if (res) d_instantiate(res, root_inode); else iput(root_inode); } return res; } EXPORT_SYMBOL(d_make_root); static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) { struct super_block *sb; struct dentry *new, *res; if (!inode) return ERR_PTR(-ESTALE); if (IS_ERR(inode)) return ERR_CAST(inode); sb = inode->i_sb; res = d_find_any_alias(inode); /* existing alias? */ if (res) goto out; new = d_alloc_anon(sb); if (!new) { res = ERR_PTR(-ENOMEM); goto out; } security_d_instantiate(new, inode); spin_lock(&inode->i_lock); res = __d_find_any_alias(inode); /* recheck under lock */ if (likely(!res)) { /* still no alias, attach a disconnected dentry */ unsigned add_flags = d_flags_for_inode(inode); if (disconnected) add_flags |= DCACHE_DISCONNECTED; spin_lock(&new->d_lock); __d_set_inode_and_type(new, inode, add_flags); hlist_add_head(&new->d_u.d_alias, &inode->i_dentry); if (!disconnected) { hlist_bl_lock(&sb->s_roots); hlist_bl_add_head(&new->d_hash, &sb->s_roots); hlist_bl_unlock(&sb->s_roots); } spin_unlock(&new->d_lock); spin_unlock(&inode->i_lock); inode = NULL; /* consumed by new->d_inode */ res = new; } else { spin_unlock(&inode->i_lock); dput(new); } out: iput(inode); return res; } /** * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode * @inode: inode to allocate the dentry for * * Obtain a dentry for an inode resulting from NFS filehandle conversion or * similar open by handle operations. The returned dentry may be anonymous, * or may have a full name (if the inode was already in the cache). * * When called on a directory inode, we must ensure that the inode only ever * has one dentry. If a dentry is found, that is returned instead of * allocating a new one. * * On successful return, the reference to the inode has been transferred * to the dentry. In case of an error the reference on the inode is released. * To make it easier to use in export operations a %NULL or IS_ERR inode may * be passed in and the error will be propagated to the return value, * with a %NULL @inode replaced by ERR_PTR(-ESTALE). */ struct dentry *d_obtain_alias(struct inode *inode) { return __d_obtain_alias(inode, true); } EXPORT_SYMBOL(d_obtain_alias); /** * d_obtain_root - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for * * Obtain an IS_ROOT dentry for the root of a filesystem. * * We must ensure that directory inodes only ever have one dentry. If a * dentry is found, that is returned instead of allocating a new one. * * On successful return, the reference to the inode has been transferred * to the dentry. In case of an error the reference on the inode is * released. A %NULL or IS_ERR inode may be passed in and will be the * error will be propagate to the return value, with a %NULL @inode * replaced by ERR_PTR(-ESTALE). */ struct dentry *d_obtain_root(struct inode *inode) { return __d_obtain_alias(inode, false); } EXPORT_SYMBOL(d_obtain_root); /** * d_add_ci - lookup or allocate new dentry with case-exact name * @inode: the inode case-insensitive lookup has found * @dentry: the negative dentry that was passed to the parent's lookup func * @name: the case-exact name to be associated with the returned dentry * * This is to avoid filling the dcache with case-insensitive names to the * same inode, only the actual correct case is stored in the dcache for * case-insensitive filesystems. * * For a case-insensitive lookup match and if the case-exact dentry * already exists in the dcache, use it and return it. * * If no entry exists with the exact case name, allocate new dentry with * the exact case, and return the spliced entry. */ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, struct qstr *name) { struct dentry *found, *res; /* * First check if a dentry matching the name already exists, * if not go ahead and create it now. */ found = d_hash_and_lookup(dentry->d_parent, name); if (found) { iput(inode); return found; } if (d_in_lookup(dentry)) { found = d_alloc_parallel(dentry->d_parent, name, dentry->d_wait); if (IS_ERR(found) || !d_in_lookup(found)) { iput(inode); return found; } } else { found = d_alloc(dentry->d_parent, name); if (!found) { iput(inode); return ERR_PTR(-ENOMEM); } } res = d_splice_alias(inode, found); if (res) { d_lookup_done(found); dput(found); return res; } return found; } EXPORT_SYMBOL(d_add_ci); /** * d_same_name - compare dentry name with case-exact name * @parent: parent dentry * @dentry: the negative dentry that was passed to the parent's lookup func * @name: the case-exact name to be associated with the returned dentry * * Return: true if names are same, or false */ bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name) { if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) { if (dentry->d_name.len != name->len) return false; return dentry_cmp(dentry, name->name, name->len) == 0; } return parent->d_op->d_compare(dentry, dentry->d_name.len, dentry->d_name.name, name) == 0; } EXPORT_SYMBOL_GPL(d_same_name); /* * This is __d_lookup_rcu() when the parent dentry has * DCACHE_OP_COMPARE, which makes things much nastier. */ static noinline struct dentry *__d_lookup_rcu_op_compare( const struct dentry *parent, const struct qstr *name, unsigned *seqp) { u64 hashlen = name->hash_len; struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); struct hlist_bl_node *node; struct dentry *dentry; hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { int tlen; const char *tname; unsigned seq; seqretry: seq = raw_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) continue; if (dentry->d_name.hash != hashlen_hash(hashlen)) continue; tlen = dentry->d_name.len; tname = dentry->d_name.name; /* we want a consistent (name,len) pair */ if (read_seqcount_retry(&dentry->d_seq, seq)) { cpu_relax(); goto seqretry; } if (parent->d_op->d_compare(dentry, tlen, tname, name) != 0) continue; *seqp = seq; return dentry; } return NULL; } /** * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry * @name: qstr of name we wish to find * @seqp: returns d_seq value at the point where the dentry was found * Returns: dentry, or NULL * * __d_lookup_rcu is the dcache lookup function for rcu-walk name * resolution (store-free path walking) design described in * Documentation/filesystems/path-lookup.txt. * * This is not to be used outside core vfs. * * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock * held, and rcu_read_lock held. The returned dentry must not be stored into * without taking d_lock and checking d_seq sequence count against @seq * returned here. * * A refcount may be taken on the found dentry with the d_rcu_to_refcount * function. * * Alternatively, __d_lookup_rcu may be called again to look up the child of * the returned dentry, so long as its parent's seqlock is checked after the * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. * * NOTE! The caller *has* to check the resulting dentry against the sequence * number we've returned before using any of the resulting dentry state! */ struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seqp) { u64 hashlen = name->hash_len; const unsigned char *str = name->name; struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); struct hlist_bl_node *node; struct dentry *dentry; /* * Note: There is significant duplication with __d_lookup_rcu which is * required to prevent single threaded performance regressions * especially on architectures where smp_rmb (in seqcounts) are costly. * Keep the two functions in sync. */ if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) return __d_lookup_rcu_op_compare(parent, name, seqp); /* * The hash list is protected using RCU. * * Carefully use d_seq when comparing a candidate dentry, to avoid * races with d_move(). * * It is possible that concurrent renames can mess up our list * walk here and result in missing our dentry, resulting in the * false-negative result. d_lookup() protects against concurrent * renames using rename_lock seqlock. * * See Documentation/filesystems/path-lookup.txt for more details. */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { unsigned seq; /* * The dentry sequence count protects us from concurrent * renames, and thus protects parent and name fields. * * The caller must perform a seqcount check in order * to do anything useful with the returned dentry. * * NOTE! We do a "raw" seqcount_begin here. That means that * we don't wait for the sequence count to stabilize if it * is in the middle of a sequence change. If we do the slow * dentry compare, we will do seqretries until it is stable, * and if we end up with a successful lookup, we actually * want to exit RCU lookup anyway. * * Note that raw_seqcount_begin still *does* smp_rmb(), so * we are still guaranteed NUL-termination of ->d_name.name. */ seq = raw_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) continue; if (dentry->d_name.hash_len != hashlen) continue; if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0) continue; *seqp = seq; return dentry; } return NULL; } /** * d_lookup - search for a dentry * @parent: parent dentry * @name: qstr of name we wish to find * Returns: dentry, or NULL * * d_lookup searches the children of the parent dentry for the name in * question. If the dentry is found its reference count is incremented and the * dentry is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned if the dentry does not exist. */ struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) { struct dentry *dentry; unsigned seq; do { seq = read_seqbegin(&rename_lock); dentry = __d_lookup(parent, name); if (dentry) break; } while (read_seqretry(&rename_lock, seq)); return dentry; } EXPORT_SYMBOL(d_lookup); /** * __d_lookup - search for a dentry (racy) * @parent: parent dentry * @name: qstr of name we wish to find * Returns: dentry, or NULL * * __d_lookup is like d_lookup, however it may (rarely) return a * false-negative result due to unrelated rename activity. * * __d_lookup is slightly faster by avoiding rename_lock read seqlock, * however it must be used carefully, eg. with a following d_lookup in * the case of failure. * * __d_lookup callers must be commented. */ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) { unsigned int hash = name->hash; struct hlist_bl_head *b = d_hash(hash); struct hlist_bl_node *node; struct dentry *found = NULL; struct dentry *dentry; /* * Note: There is significant duplication with __d_lookup_rcu which is * required to prevent single threaded performance regressions * especially on architectures where smp_rmb (in seqcounts) are costly. * Keep the two functions in sync. */ /* * The hash list is protected using RCU. * * Take d_lock when comparing a candidate dentry, to avoid races * with d_move(). * * It is possible that concurrent renames can mess up our list * walk here and result in missing our dentry, resulting in the * false-negative result. d_lookup() protects against concurrent * renames using rename_lock seqlock. * * See Documentation/filesystems/path-lookup.txt for more details. */ rcu_read_lock(); hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { if (dentry->d_name.hash != hash) continue; spin_lock(&dentry->d_lock); if (dentry->d_parent != parent) goto next; if (d_unhashed(dentry)) goto next; if (!d_same_name(dentry, parent, name)) goto next; dentry->d_lockref.count++; found = dentry; spin_unlock(&dentry->d_lock); break; next: spin_unlock(&dentry->d_lock); } rcu_read_unlock(); return found; } /** * d_hash_and_lookup - hash the qstr then search for a dentry * @dir: Directory to search in * @name: qstr of name we wish to find * * On lookup failure NULL is returned; on bad name - ERR_PTR(-error) */ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) { /* * Check for a fs-specific hash function. Note that we must * calculate the standard hash first, as the d_op->d_hash() * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(dir, name->name, name->len); if (dir->d_flags & DCACHE_OP_HASH) { int err = dir->d_op->d_hash(dir, name); if (unlikely(err < 0)) return ERR_PTR(err); } return d_lookup(dir, name); } EXPORT_SYMBOL(d_hash_and_lookup); /* * When a file is deleted, we have two options: * - turn this dentry into a negative dentry * - unhash this dentry and free it. * * Usually, we want to just turn this into * a negative dentry, but if anybody else is * currently using the dentry or the inode * we can't do that and we fall back on removing * it from the hash queues and waiting for * it to be deleted later when it has no users */ /** * d_delete - delete a dentry * @dentry: The dentry to delete * * Turn the dentry into a negative dentry if possible, otherwise * remove it from the hash queues so it can be deleted later */ void d_delete(struct dentry * dentry) { struct inode *inode = dentry->d_inode; spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); /* * Are we the only user? */ if (dentry->d_lockref.count == 1) { dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_unlink_inode(dentry); } else { __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); } } EXPORT_SYMBOL(d_delete); static void __d_rehash(struct dentry *entry) { struct hlist_bl_head *b = d_hash(entry->d_name.hash); hlist_bl_lock(b); hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } /** * d_rehash - add an entry back to the hash * @entry: dentry to add to the hash * * Adds a dentry to the hash according to its name. */ void d_rehash(struct dentry * entry) { spin_lock(&entry->d_lock); __d_rehash(entry); spin_unlock(&entry->d_lock); } EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { preempt_disable_nested(); for (;;) { unsigned n = dir->i_dir_seq; if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) return n; cpu_relax(); } } static inline void end_dir_add(struct inode *dir, unsigned int n, wait_queue_head_t *d_wait) { smp_store_release(&dir->i_dir_seq, n + 2); preempt_enable_nested(); wake_up_all(d_wait); } static void d_wait_lookup(struct dentry *dentry) { if (d_in_lookup(dentry)) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(dentry->d_wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(&dentry->d_lock); schedule(); spin_lock(&dentry->d_lock); } while (d_in_lookup(dentry)); } } struct dentry *d_alloc_parallel(struct dentry *parent, const struct qstr *name, wait_queue_head_t *wq) { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); struct hlist_bl_node *node; struct dentry *new = d_alloc(parent, name); struct dentry *dentry; unsigned seq, r_seq, d_seq; if (unlikely(!new)) return ERR_PTR(-ENOMEM); retry: rcu_read_lock(); seq = smp_load_acquire(&parent->d_inode->i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { if (!lockref_get_not_dead(&dentry->d_lockref)) { rcu_read_unlock(); goto retry; } if (read_seqcount_retry(&dentry->d_seq, d_seq)) { rcu_read_unlock(); dput(dentry); goto retry; } rcu_read_unlock(); dput(new); return dentry; } if (unlikely(read_seqretry(&rename_lock, r_seq))) { rcu_read_unlock(); goto retry; } if (unlikely(seq & 1)) { rcu_read_unlock(); goto retry; } hlist_bl_lock(b); if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; } /* * No changes for the parent since the beginning of d_lookup(). * Since all removals from the chain happen with hlist_bl_lock(), * any potential in-lookup matches are going to stay here until * we unlock the chain. All fields are stable in everything * we encounter. */ hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) { if (dentry->d_name.hash != hash) continue; if (dentry->d_parent != parent) continue; if (!d_same_name(dentry, parent, name)) continue; hlist_bl_unlock(b); /* now we can try to grab a reference */ if (!lockref_get_not_dead(&dentry->d_lockref)) { rcu_read_unlock(); goto retry; } rcu_read_unlock(); /* * somebody is likely to be still doing lookup for it; * wait for them to finish */ spin_lock(&dentry->d_lock); d_wait_lookup(dentry); /* * it's not in-lookup anymore; in principle we should repeat * everything from dcache lookup, but it's likely to be what * d_lookup() would've found anyway. If it is, just return it; * otherwise we really have to repeat the whole thing. */ if (unlikely(dentry->d_name.hash != hash)) goto mismatch; if (unlikely(dentry->d_parent != parent)) goto mismatch; if (unlikely(d_unhashed(dentry))) goto mismatch; if (unlikely(!d_same_name(dentry, parent, name))) goto mismatch; /* OK, it *is* a hashed match; return it */ spin_unlock(&dentry->d_lock); dput(new); return dentry; } rcu_read_unlock(); /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; new->d_wait = wq; hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b); hlist_bl_unlock(b); return new; mismatch: spin_unlock(&dentry->d_lock); dput(dentry); goto retry; } EXPORT_SYMBOL(d_alloc_parallel); /* * - Unhash the dentry * - Retrieve and clear the waitqueue head in dentry * - Return the waitqueue head */ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) { wait_queue_head_t *d_wait; struct hlist_bl_head *b; lockdep_assert_held(&dentry->d_lock); b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); d_wait = dentry->d_wait; dentry->d_wait = NULL; hlist_bl_unlock(b); INIT_HLIST_NODE(&dentry->d_u.d_alias); INIT_LIST_HEAD(&dentry->d_lru); return d_wait; } void __d_lookup_unhash_wake(struct dentry *dentry) { spin_lock(&dentry->d_lock); wake_up_all(__d_lookup_unhash(dentry)); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(__d_lookup_unhash_wake); /* inode->i_lock held if inode is non-NULL */ static inline void __d_add(struct dentry *dentry, struct inode *inode) { wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; spin_lock(&dentry->d_lock); if (unlikely(d_in_lookup(dentry))) { dir = dentry->d_parent->d_inode; n = start_dir_add(dir); d_wait = __d_lookup_unhash(dentry); } if (inode) { unsigned add_flags = d_flags_for_inode(inode); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); fsnotify_update_flags(dentry); } __d_rehash(dentry); if (dir) end_dir_add(dir, n, d_wait); spin_unlock(&dentry->d_lock); if (inode) spin_unlock(&inode->i_lock); } /** * d_add - add dentry to hash queues * @entry: dentry to add * @inode: The inode to attach to this dentry * * This adds the entry to the hash queues and initializes @inode. * The entry was actually filled in earlier during d_alloc(). */ void d_add(struct dentry *entry, struct inode *inode) { if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); } __d_add(entry, inode); } EXPORT_SYMBOL(d_add); /** * d_exact_alias - find and hash an exact unhashed alias * @entry: dentry to add * @inode: The inode to go with this dentry * * If an unhashed dentry with the same name/parent and desired * inode already exists, hash and return it. Otherwise, return * NULL. * * Parent directory should be locked. */ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) { struct dentry *alias; unsigned int hash = entry->d_name.hash; spin_lock(&inode->i_lock); hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or * parent changes, because the parent inode i_mutex is held. */ if (alias->d_name.hash != hash) continue; if (alias->d_parent != entry->d_parent) continue; if (!d_same_name(alias, entry->d_parent, &entry->d_name)) continue; spin_lock(&alias->d_lock); if (!d_unhashed(alias)) { spin_unlock(&alias->d_lock); alias = NULL; } else { dget_dlock(alias); __d_rehash(alias); spin_unlock(&alias->d_lock); } spin_unlock(&inode->i_lock); return alias; } spin_unlock(&inode->i_lock); return NULL; } EXPORT_SYMBOL(d_exact_alias); static void swap_names(struct dentry *dentry, struct dentry *target) { if (unlikely(dname_external(target))) { if (unlikely(dname_external(dentry))) { /* * Both external: swap the pointers */ swap(target->d_name.name, dentry->d_name.name); } else { /* * dentry:internal, target:external. Steal target's * storage and make target internal. */ memcpy(target->d_iname, dentry->d_name.name, dentry->d_name.len + 1); dentry->d_name.name = target->d_name.name; target->d_name.name = target->d_iname; } } else { if (unlikely(dname_external(dentry))) { /* * dentry:external, target:internal. Give dentry's * storage to target and make dentry internal */ memcpy(dentry->d_iname, target->d_name.name, target->d_name.len + 1); target->d_name.name = dentry->d_name.name; dentry->d_name.name = dentry->d_iname; } else { /* * Both are internal. */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); } } } swap(dentry->d_name.hash_len, target->d_name.hash_len); } static void copy_name(struct dentry *dentry, struct dentry *target) { struct external_name *old_name = NULL; if (unlikely(dname_external(dentry))) old_name = external_name(dentry); if (unlikely(dname_external(target))) { atomic_inc(&external_name(target)->u.count); dentry->d_name = target->d_name; } else { memcpy(dentry->d_iname, target->d_name.name, target->d_name.len + 1); dentry->d_name.name = dentry->d_iname; dentry->d_name.hash_len = target->d_name.hash_len; } if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) kfree_rcu(old_name, u.head); } /* * __d_move - move a dentry * @dentry: entry to move * @target: new dentry * @exchange: exchange the two dentries * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. Caller must hold * rename_lock, the i_mutex of the source and target directories, * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). */ static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { struct dentry *old_parent, *p; wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; WARN_ON(!dentry->d_inode); if (WARN_ON(dentry == target)) return; BUG_ON(d_ancestor(target, dentry)); old_parent = dentry->d_parent; p = d_ancestor(old_parent, target); if (IS_ROOT(dentry)) { BUG_ON(p); spin_lock(&target->d_parent->d_lock); } else if (!p) { /* target is not a descendent of dentry->d_parent */ spin_lock(&target->d_parent->d_lock); spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED); } else { BUG_ON(p == dentry); spin_lock(&old_parent->d_lock); if (p != target) spin_lock_nested(&target->d_parent->d_lock, DENTRY_D_LOCK_NESTED); } spin_lock_nested(&dentry->d_lock, 2); spin_lock_nested(&target->d_lock, 3); if (unlikely(d_in_lookup(target))) { dir = target->d_parent->d_inode; n = start_dir_add(dir); d_wait = __d_lookup_unhash(target); } write_seqcount_begin(&dentry->d_seq); write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* unhash both */ if (!d_unhashed(dentry)) ___d_drop(dentry); if (!d_unhashed(target)) ___d_drop(target); /* ... and switch them in the tree */ dentry->d_parent = target->d_parent; if (!exchange) { copy_name(dentry, target); target->d_hash.pprev = NULL; dentry->d_parent->d_lockref.count++; if (dentry != old_parent) /* wasn't IS_ROOT */ WARN_ON(!--old_parent->d_lockref.count); } else { target->d_parent = old_parent; swap_names(dentry, target); if (!hlist_unhashed(&target->d_sib)) __hlist_del(&target->d_sib); hlist_add_head(&target->d_sib, &target->d_parent->d_children); __d_rehash(target); fsnotify_update_flags(target); } if (!hlist_unhashed(&dentry->d_sib)) __hlist_del(&dentry->d_sib); hlist_add_head(&dentry->d_sib, &dentry->d_parent->d_children); __d_rehash(dentry); fsnotify_update_flags(dentry); fscrypt_handle_d_move(dentry); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); if (dir) end_dir_add(dir, n, d_wait); if (dentry->d_parent != old_parent) spin_unlock(&dentry->d_parent->d_lock); if (dentry != old_parent) spin_unlock(&old_parent->d_lock); spin_unlock(&target->d_lock); spin_unlock(&dentry->d_lock); } /* * d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. See the locking * requirements for __d_move. */ void d_move(struct dentry *dentry, struct dentry *target) { write_seqlock(&rename_lock); __d_move(dentry, target, false); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); /* * d_exchange - exchange two dentries * @dentry1: first dentry * @dentry2: second dentry */ void d_exchange(struct dentry *dentry1, struct dentry *dentry2) { write_seqlock(&rename_lock); WARN_ON(!dentry1->d_inode); WARN_ON(!dentry2->d_inode); WARN_ON(IS_ROOT(dentry1)); WARN_ON(IS_ROOT(dentry2)); __d_move(dentry1, dentry2, true); write_sequnlock(&rename_lock); } /** * d_ancestor - search for an ancestor * @p1: ancestor dentry * @p2: child dentry * * Returns the ancestor dentry of p2 which is a child of p1, if p1 is * an ancestor of p2, else NULL. */ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) { struct dentry *p; for (p = p2; !IS_ROOT(p); p = p->d_parent) { if (p->d_parent == p1) return p; } return NULL; } /* * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding * dentry->d_parent->d_inode->i_mutex, and rename_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ static int __d_unalias(struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL; struct rw_semaphore *m2 = NULL; int ret = -ESTALE; /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) goto out_unalias; /* See lock_rename() */ if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; if (!inode_trylock_shared(alias->d_parent->d_inode)) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: __d_move(alias, dentry, false); ret = 0; out_err: if (m2) up_read(m2); if (m1) mutex_unlock(m1); return ret; } /** * d_splice_alias - splice a disconnected dentry into the tree if one exists * @inode: the inode which may have a disconnected dentry * @dentry: a negative dentry which we want to point to the inode. * * If inode is a directory and has an IS_ROOT alias, then d_move that in * place of the given dentry and return it, else simply d_add the inode * to the dentry and return NULL. * * If a non-IS_ROOT directory is found, the filesystem is corrupt, and * we should error out: directories can't have multiple aliases. * * This is needed in the lookup routine of any filesystem that is exportable * (via knfsd) so that we can build dcache paths to directories effectively. * * If a dentry was found and moved, then it is returned. Otherwise NULL * is returned. This matches the expected return value of ->lookup. * * Cluster filesystems may call this function with a negative, hashed dentry. * In that case, we know that the inode will be a regular file, and also this * will only occur during atomic_open. So we need to check for the dentry * being already hashed only in the final case. */ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) { if (IS_ERR(inode)) return ERR_CAST(inode); BUG_ON(!d_unhashed(dentry)); if (!inode) goto out; security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode)) { struct dentry *new = __d_find_any_alias(inode); if (unlikely(new)) { /* The reference to new ensures it remains an alias */ spin_unlock(&inode->i_lock); write_seqlock(&rename_lock); if (unlikely(d_ancestor(new, dentry))) { write_sequnlock(&rename_lock); dput(new); new = ERR_PTR(-ELOOP); pr_warn_ratelimited( "VFS: Lookup of '%s' in %s %s" " would have caused loop\n", dentry->d_name.name, inode->i_sb->s_type->name, inode->i_sb->s_id); } else if (!IS_ROOT(new)) { struct dentry *old_parent = dget(new->d_parent); int err = __d_unalias(dentry, new); write_sequnlock(&rename_lock); if (err) { dput(new); new = ERR_PTR(err); } dput(old_parent); } else { __d_move(new, dentry, false); write_sequnlock(&rename_lock); } iput(inode); return new; } } out: __d_add(dentry, inode); return NULL; } EXPORT_SYMBOL(d_splice_alias); /* * Test whether new_dentry is a subdirectory of old_dentry. * * Trivially implemented using the dcache structure */ /** * is_subdir - is new dentry a subdirectory of old_dentry * @new_dentry: new dentry * @old_dentry: old dentry * * Returns true if new_dentry is a subdirectory of the parent (at any depth). * Returns false otherwise. * Caller must ensure that "new_dentry" is pinned before calling is_subdir() */ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { bool result; unsigned seq; if (new_dentry == old_dentry) return true; do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); /* * Need rcu_readlock to protect against the d_parent trashing * due to d_move */ rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = true; else result = false; rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); return result; } EXPORT_SYMBOL(is_subdir); static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) { struct dentry *root = data; if (dentry != root) { if (d_unhashed(dentry) || !dentry->d_inode) return D_WALK_SKIP; if (!(dentry->d_flags & DCACHE_GENOCIDE)) { dentry->d_flags |= DCACHE_GENOCIDE; dentry->d_lockref.count--; } } return D_WALK_CONTINUE; } void d_genocide(struct dentry *parent) { d_walk(parent, parent, d_genocide_kill); } void d_mark_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; BUG_ON(dentry->d_name.name != dentry->d_iname || !hlist_unhashed(&dentry->d_u.d_alias) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", (unsigned long long)inode->i_ino); spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_parent->d_lock); } EXPORT_SYMBOL(d_mark_tmpfile); void d_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; inode_dec_link_count(inode); d_mark_tmpfile(file, inode); d_instantiate(dentry, inode); } EXPORT_SYMBOL(d_tmpfile); static __initdata unsigned long dhash_entries; static int __init set_dhash_entries(char *str) { if (!str) return 0; dhash_entries = simple_strtoul(str, &str, 0); return 1; } __setup("dhash_entries=", set_dhash_entries); static void __init dcache_init_early(void) { /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ if (hashdist) return; dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_bl_head), dhash_entries, 13, HASH_EARLY | HASH_ZERO, &d_hash_shift, NULL, 0, 0); d_hash_shift = 32 - d_hash_shift; } static void __init dcache_init(void) { /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature * of the dcache. */ dentry_cache = KMEM_CACHE_USERCOPY(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT, d_iname); /* Hash may have been set up in dcache_init_early */ if (!hashdist) return; dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_bl_head), dhash_entries, 13, HASH_ZERO, &d_hash_shift, NULL, 0, 0); d_hash_shift = 32 - d_hash_shift; } /* SLAB cache for __getname() consumers */ struct kmem_cache *names_cachep __ro_after_init; EXPORT_SYMBOL(names_cachep); void __init vfs_caches_init_early(void) { int i; for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); dcache_init_early(); inode_init_early(); } void __init vfs_caches_init(void) { names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL); dcache_init(); inode_init(); files_init(); files_maxfiles_init(); mnt_init(); bdev_cache_init(); chrdev_init(); } |
1 1 1 1 2 2 1 1 1 3 3 2 3 4 4 4 3 4 4 4 3 2 2 1 3 3 27 6 5 1 4 1 1 2 2 2 2 1 1 1 3 3 7 2 2 13 13 14 5 2 5 2 5 3 3 3 3 6 6 6 3 3 2 2 2 2 2 2 1 5 5 3 1 1 1 1 1 1 1 2 2 2 2 1 2 2 2 2 2 1 1 1 1 1 80 80 80 84 4 80 80 80 80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS emulation layer for the mixer interface * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/init.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/string.h> #include <linux/module.h> #include <linux/compat.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/control.h> #include <sound/info.h> #include <sound/mixer_oss.h> #include <linux/soundcard.h> #define OSS_ALSAEMULVER _SIOR ('M', 249, int) MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("Mixer OSS emulation for ALSA."); MODULE_LICENSE("GPL"); MODULE_ALIAS_SNDRV_MINOR(SNDRV_MINOR_OSS_MIXER); static int snd_mixer_oss_open(struct inode *inode, struct file *file) { struct snd_card *card; struct snd_mixer_oss_file *fmixer; int err; err = nonseekable_open(inode, file); if (err < 0) return err; card = snd_lookup_oss_minor_data(iminor(inode), SNDRV_OSS_DEVICE_TYPE_MIXER); if (card == NULL) return -ENODEV; if (card->mixer_oss == NULL) { snd_card_unref(card); return -ENODEV; } err = snd_card_file_add(card, file); if (err < 0) { snd_card_unref(card); return err; } fmixer = kzalloc(sizeof(*fmixer), GFP_KERNEL); if (fmixer == NULL) { snd_card_file_remove(card, file); snd_card_unref(card); return -ENOMEM; } fmixer->card = card; fmixer->mixer = card->mixer_oss; file->private_data = fmixer; if (!try_module_get(card->module)) { kfree(fmixer); snd_card_file_remove(card, file); snd_card_unref(card); return -EFAULT; } snd_card_unref(card); return 0; } static int snd_mixer_oss_release(struct inode *inode, struct file *file) { struct snd_mixer_oss_file *fmixer; if (file->private_data) { fmixer = file->private_data; module_put(fmixer->card->module); snd_card_file_remove(fmixer->card, file); kfree(fmixer); } return 0; } static int snd_mixer_oss_info(struct snd_mixer_oss_file *fmixer, mixer_info __user *_info) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; struct mixer_info info; memset(&info, 0, sizeof(info)); strscpy(info.id, mixer && mixer->id[0] ? mixer->id : card->driver, sizeof(info.id)); strscpy(info.name, mixer && mixer->name[0] ? mixer->name : card->mixername, sizeof(info.name)); info.modify_counter = card->mixer_oss_change_count; if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_mixer_oss_info_obsolete(struct snd_mixer_oss_file *fmixer, _old_mixer_info __user *_info) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; _old_mixer_info info; memset(&info, 0, sizeof(info)); strscpy(info.id, mixer && mixer->id[0] ? mixer->id : card->driver, sizeof(info.id)); strscpy(info.name, mixer && mixer->name[0] ? mixer->name : card->mixername, sizeof(info.name)); if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_mixer_oss_caps(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; int result = 0; if (mixer == NULL) return -EIO; if (mixer->get_recsrc && mixer->put_recsrc) result |= SOUND_CAP_EXCL_INPUT; return result; } static int snd_mixer_oss_devmask(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, chn; if (mixer == NULL) return -EIO; mutex_lock(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume || pslot->put_recsrc) result |= 1 << chn; } mutex_unlock(&mixer->reg_mutex); return result; } static int snd_mixer_oss_stereodevs(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, chn; if (mixer == NULL) return -EIO; mutex_lock(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume && pslot->stereo) result |= 1 << chn; } mutex_unlock(&mixer->reg_mutex); return result; } static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; int result = 0; if (mixer == NULL) return -EIO; mutex_lock(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ result = mixer->mask_recsrc; } else { struct snd_mixer_oss_slot *pslot; int chn; for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_recsrc) result |= 1 << chn; } } mutex_unlock(&mixer->reg_mutex); return result; } static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; int result = 0; if (mixer == NULL) return -EIO; mutex_lock(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ unsigned int index; result = mixer->get_recsrc(fmixer, &index); if (result < 0) goto unlock; result = 1 << index; } else { struct snd_mixer_oss_slot *pslot; int chn; for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->get_recsrc) { int active = 0; pslot->get_recsrc(fmixer, pslot, &active); if (active) result |= 1 << chn; } } } mixer->oss_recsrc = result; unlock: mutex_unlock(&mixer->reg_mutex); return result; } static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsrc) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int chn, active; unsigned int index; int result = 0; if (mixer == NULL) return -EIO; mutex_lock(&mixer->reg_mutex); if (mixer->get_recsrc && mixer->put_recsrc) { /* exclusive input */ if (recsrc & ~mixer->oss_recsrc) recsrc &= ~mixer->oss_recsrc; mixer->put_recsrc(fmixer, ffz(~recsrc)); mixer->get_recsrc(fmixer, &index); result = 1 << index; } for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_recsrc) { active = (recsrc & (1 << chn)) ? 1 : 0; pslot->put_recsrc(fmixer, pslot, active); } } if (! result) { for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->get_recsrc) { active = 0; pslot->get_recsrc(fmixer, pslot, &active); if (active) result |= 1 << chn; } } } mutex_unlock(&mixer->reg_mutex); return result; } static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, left, right; if (mixer == NULL || slot > 30) return -EIO; mutex_lock(&mixer->reg_mutex); pslot = &mixer->slots[slot]; left = pslot->volume[0]; right = pslot->volume[1]; if (pslot->get_volume) result = pslot->get_volume(fmixer, pslot, &left, &right); if (!pslot->stereo) right = left; if (snd_BUG_ON(left < 0 || left > 100)) { result = -EIO; goto unlock; } if (snd_BUG_ON(right < 0 || right > 100)) { result = -EIO; goto unlock; } if (result >= 0) { pslot->volume[0] = left; pslot->volume[1] = right; result = (left & 0xff) | ((right & 0xff) << 8); } unlock: mutex_unlock(&mixer->reg_mutex); return result; } static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer, int slot, int volume) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, left = volume & 0xff, right = (volume >> 8) & 0xff; if (mixer == NULL || slot > 30) return -EIO; mutex_lock(&mixer->reg_mutex); pslot = &mixer->slots[slot]; if (left > 100) left = 100; if (right > 100) right = 100; if (!pslot->stereo) right = left; if (pslot->put_volume) result = pslot->put_volume(fmixer, pslot, left, right); if (result < 0) goto unlock; pslot->volume[0] = left; pslot->volume[1] = right; result = (left & 0xff) | ((right & 0xff) << 8); unlock: mutex_unlock(&mixer->reg_mutex); return result; } static int snd_mixer_oss_ioctl1(struct snd_mixer_oss_file *fmixer, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; int __user *p = argp; int tmp; if (snd_BUG_ON(!fmixer)) return -ENXIO; if (((cmd >> 8) & 0xff) == 'M') { switch (cmd) { case SOUND_MIXER_INFO: return snd_mixer_oss_info(fmixer, argp); case SOUND_OLD_MIXER_INFO: return snd_mixer_oss_info_obsolete(fmixer, argp); case SOUND_MIXER_WRITE_RECSRC: if (get_user(tmp, p)) return -EFAULT; tmp = snd_mixer_oss_set_recsrc(fmixer, tmp); if (tmp < 0) return tmp; return put_user(tmp, p); case OSS_GETVERSION: return put_user(SNDRV_OSS_VERSION, p); case OSS_ALSAEMULVER: return put_user(1, p); case SOUND_MIXER_READ_DEVMASK: tmp = snd_mixer_oss_devmask(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_STEREODEVS: tmp = snd_mixer_oss_stereodevs(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_RECMASK: tmp = snd_mixer_oss_recmask(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_CAPS: tmp = snd_mixer_oss_caps(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_RECSRC: tmp = snd_mixer_oss_get_recsrc(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); } } if (cmd & SIOC_IN) { if (get_user(tmp, p)) return -EFAULT; tmp = snd_mixer_oss_set_volume(fmixer, cmd & 0xff, tmp); if (tmp < 0) return tmp; return put_user(tmp, p); } else if (cmd & SIOC_OUT) { tmp = snd_mixer_oss_get_volume(fmixer, cmd & 0xff); if (tmp < 0) return tmp; return put_user(tmp, p); } return -ENXIO; } static long snd_mixer_oss_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return snd_mixer_oss_ioctl1(file->private_data, cmd, arg); } int snd_mixer_oss_ioctl_card(struct snd_card *card, unsigned int cmd, unsigned long arg) { struct snd_mixer_oss_file fmixer; if (snd_BUG_ON(!card)) return -ENXIO; if (card->mixer_oss == NULL) return -ENXIO; memset(&fmixer, 0, sizeof(fmixer)); fmixer.card = card; fmixer.mixer = card->mixer_oss; return snd_mixer_oss_ioctl1(&fmixer, cmd, arg); } EXPORT_SYMBOL(snd_mixer_oss_ioctl_card); #ifdef CONFIG_COMPAT /* all compatible */ static long snd_mixer_oss_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { return snd_mixer_oss_ioctl1(file->private_data, cmd, (unsigned long)compat_ptr(arg)); } #else #define snd_mixer_oss_ioctl_compat NULL #endif /* * REGISTRATION PART */ static const struct file_operations snd_mixer_oss_f_ops = { .owner = THIS_MODULE, .open = snd_mixer_oss_open, .release = snd_mixer_oss_release, .llseek = no_llseek, .unlocked_ioctl = snd_mixer_oss_ioctl, .compat_ioctl = snd_mixer_oss_ioctl_compat, }; /* * utilities */ static long snd_mixer_oss_conv(long val, long omin, long omax, long nmin, long nmax) { long orange = omax - omin, nrange = nmax - nmin; if (orange == 0) return 0; return DIV_ROUND_CLOSEST(nrange * (val - omin), orange) + nmin; } /* convert from alsa native to oss values (0-100) */ static long snd_mixer_oss_conv1(long val, long min, long max, int *old) { if (val == snd_mixer_oss_conv(*old, 0, 100, min, max)) return *old; return snd_mixer_oss_conv(val, min, max, 0, 100); } /* convert from oss to alsa native values */ static long snd_mixer_oss_conv2(long val, long min, long max) { return snd_mixer_oss_conv(val, 0, 100, min, max); } #if 0 static void snd_mixer_oss_recsrce_set(struct snd_card *card, int slot) { struct snd_mixer_oss *mixer = card->mixer_oss; if (mixer) mixer->mask_recsrc |= 1 << slot; } static int snd_mixer_oss_recsrce_get(struct snd_card *card, int slot) { struct snd_mixer_oss *mixer = card->mixer_oss; if (mixer && (mixer->mask_recsrc & (1 << slot))) return 1; return 0; } #endif #define SNDRV_MIXER_OSS_SIGNATURE 0x65999250 #define SNDRV_MIXER_OSS_ITEM_GLOBAL 0 #define SNDRV_MIXER_OSS_ITEM_GSWITCH 1 #define SNDRV_MIXER_OSS_ITEM_GROUTE 2 #define SNDRV_MIXER_OSS_ITEM_GVOLUME 3 #define SNDRV_MIXER_OSS_ITEM_PSWITCH 4 #define SNDRV_MIXER_OSS_ITEM_PROUTE 5 #define SNDRV_MIXER_OSS_ITEM_PVOLUME 6 #define SNDRV_MIXER_OSS_ITEM_CSWITCH 7 #define SNDRV_MIXER_OSS_ITEM_CROUTE 8 #define SNDRV_MIXER_OSS_ITEM_CVOLUME 9 #define SNDRV_MIXER_OSS_ITEM_CAPTURE 10 #define SNDRV_MIXER_OSS_ITEM_COUNT 11 #define SNDRV_MIXER_OSS_PRESENT_GLOBAL (1<<0) #define SNDRV_MIXER_OSS_PRESENT_GSWITCH (1<<1) #define SNDRV_MIXER_OSS_PRESENT_GROUTE (1<<2) #define SNDRV_MIXER_OSS_PRESENT_GVOLUME (1<<3) #define SNDRV_MIXER_OSS_PRESENT_PSWITCH (1<<4) #define SNDRV_MIXER_OSS_PRESENT_PROUTE (1<<5) #define SNDRV_MIXER_OSS_PRESENT_PVOLUME (1<<6) #define SNDRV_MIXER_OSS_PRESENT_CSWITCH (1<<7) #define SNDRV_MIXER_OSS_PRESENT_CROUTE (1<<8) #define SNDRV_MIXER_OSS_PRESENT_CVOLUME (1<<9) #define SNDRV_MIXER_OSS_PRESENT_CAPTURE (1<<10) struct slot { unsigned int signature; unsigned int present; unsigned int channels; unsigned int numid[SNDRV_MIXER_OSS_ITEM_COUNT]; unsigned int capture_item; const struct snd_mixer_oss_assign_table *assigned; unsigned int allocated: 1; }; #define ID_UNKNOWN ((unsigned int)-1) static struct snd_kcontrol *snd_mixer_oss_test_id(struct snd_mixer_oss *mixer, const char *name, int index) { struct snd_card *card = mixer->card; struct snd_ctl_elem_id id; memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; strscpy(id.name, name, sizeof(id.name)); id.index = index; return snd_ctl_find_id_locked(card, &id); } static void snd_mixer_oss_get_volume1_vol(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int *left, int *right) { struct snd_ctl_elem_info *uinfo; struct snd_ctl_elem_value *uctl; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; if (numid == ID_UNKNOWN) return; down_read(&card->controls_rwsem); kctl = snd_ctl_find_numid_locked(card, numid); if (!kctl) { up_read(&card->controls_rwsem); return; } uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) goto __unalloc; if (kctl->info(kctl, uinfo)) goto __unalloc; if (kctl->get(kctl, uctl)) goto __unalloc; if (uinfo->type == SNDRV_CTL_ELEM_TYPE_BOOLEAN && uinfo->value.integer.min == 0 && uinfo->value.integer.max == 1) goto __unalloc; *left = snd_mixer_oss_conv1(uctl->value.integer.value[0], uinfo->value.integer.min, uinfo->value.integer.max, &pslot->volume[0]); if (uinfo->count > 1) *right = snd_mixer_oss_conv1(uctl->value.integer.value[1], uinfo->value.integer.min, uinfo->value.integer.max, &pslot->volume[1]); __unalloc: up_read(&card->controls_rwsem); kfree(uctl); kfree(uinfo); } static void snd_mixer_oss_get_volume1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int *left, int *right, int route) { struct snd_ctl_elem_info *uinfo; struct snd_ctl_elem_value *uctl; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; if (numid == ID_UNKNOWN) return; down_read(&card->controls_rwsem); kctl = snd_ctl_find_numid_locked(card, numid); if (!kctl) { up_read(&card->controls_rwsem); return; } uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) goto __unalloc; if (kctl->info(kctl, uinfo)) goto __unalloc; if (kctl->get(kctl, uctl)) goto __unalloc; if (!uctl->value.integer.value[0]) { *left = 0; if (uinfo->count == 1) *right = 0; } if (uinfo->count > 1 && !uctl->value.integer.value[route ? 3 : 1]) *right = 0; __unalloc: up_read(&card->controls_rwsem); kfree(uctl); kfree(uinfo); } static int snd_mixer_oss_get_volume1(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int *left, int *right) { struct slot *slot = pslot->private_data; *left = *right = 100; if (slot->present & SNDRV_MIXER_OSS_PRESENT_PVOLUME) { snd_mixer_oss_get_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GVOLUME) { snd_mixer_oss_get_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GLOBAL) { snd_mixer_oss_get_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GLOBAL], left, right); } if (slot->present & SNDRV_MIXER_OSS_PRESENT_PSWITCH) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GSWITCH) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_PROUTE) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PROUTE], left, right, 1); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GROUTE) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GROUTE], left, right, 1); } return 0; } static void snd_mixer_oss_put_volume1_vol(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int left, int right) { struct snd_ctl_elem_info *uinfo; struct snd_ctl_elem_value *uctl; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; int res; if (numid == ID_UNKNOWN) return; down_read(&card->controls_rwsem); kctl = snd_ctl_find_numid_locked(card, numid); if (!kctl) { up_read(&card->controls_rwsem); return; } uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) goto __unalloc; if (kctl->info(kctl, uinfo)) goto __unalloc; if (uinfo->type == SNDRV_CTL_ELEM_TYPE_BOOLEAN && uinfo->value.integer.min == 0 && uinfo->value.integer.max == 1) goto __unalloc; uctl->value.integer.value[0] = snd_mixer_oss_conv2(left, uinfo->value.integer.min, uinfo->value.integer.max); if (uinfo->count > 1) uctl->value.integer.value[1] = snd_mixer_oss_conv2(right, uinfo->value.integer.min, uinfo->value.integer.max); res = kctl->put(kctl, uctl); if (res < 0) goto __unalloc; if (res > 0) snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &kctl->id); __unalloc: up_read(&card->controls_rwsem); kfree(uctl); kfree(uinfo); } static void snd_mixer_oss_put_volume1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int left, int right, int route) { struct snd_ctl_elem_info *uinfo; struct snd_ctl_elem_value *uctl; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; int res; if (numid == ID_UNKNOWN) return; down_read(&card->controls_rwsem); kctl = snd_ctl_find_numid_locked(card, numid); if (!kctl) { up_read(&card->controls_rwsem); return; } uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) goto __unalloc; if (kctl->info(kctl, uinfo)) goto __unalloc; if (uinfo->count > 1) { uctl->value.integer.value[0] = left > 0 ? 1 : 0; uctl->value.integer.value[route ? 3 : 1] = right > 0 ? 1 : 0; if (route) { uctl->value.integer.value[1] = uctl->value.integer.value[2] = 0; } } else { uctl->value.integer.value[0] = (left > 0 || right > 0) ? 1 : 0; } res = kctl->put(kctl, uctl); if (res < 0) goto __unalloc; if (res > 0) snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &kctl->id); __unalloc: up_read(&card->controls_rwsem); kfree(uctl); kfree(uinfo); } static int snd_mixer_oss_put_volume1(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int left, int right) { struct slot *slot = pslot->private_data; if (slot->present & SNDRV_MIXER_OSS_PRESENT_PVOLUME) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PVOLUME], left, right); if (slot->present & SNDRV_MIXER_OSS_PRESENT_CVOLUME) snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_CVOLUME) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GVOLUME) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GLOBAL) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GLOBAL], left, right); } if (left || right) { if (slot->present & SNDRV_MIXER_OSS_PRESENT_PSWITCH) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PSWITCH], left, right, 0); if (slot->present & SNDRV_MIXER_OSS_PRESENT_CSWITCH) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], left, right, 0); if (slot->present & SNDRV_MIXER_OSS_PRESENT_GSWITCH) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GSWITCH], left, right, 0); if (slot->present & SNDRV_MIXER_OSS_PRESENT_PROUTE) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PROUTE], left, right, 1); if (slot->present & SNDRV_MIXER_OSS_PRESENT_CROUTE) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], left, right, 1); if (slot->present & SNDRV_MIXER_OSS_PRESENT_GROUTE) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GROUTE], left, right, 1); } else { if (slot->present & SNDRV_MIXER_OSS_PRESENT_PSWITCH) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_CSWITCH) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GSWITCH) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_PROUTE) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PROUTE], left, right, 1); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_CROUTE) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], left, right, 1); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GROUTE) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GROUTE], left, right, 1); } } return 0; } static int snd_mixer_oss_get_recsrc1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int *active) { struct slot *slot = pslot->private_data; int left, right; left = right = 1; snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], &left, &right, 0); *active = (left || right) ? 1 : 0; return 0; } static int snd_mixer_oss_get_recsrc1_route(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int *active) { struct slot *slot = pslot->private_data; int left, right; left = right = 1; snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], &left, &right, 1); *active = (left || right) ? 1 : 0; return 0; } static int snd_mixer_oss_put_recsrc1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int active) { struct slot *slot = pslot->private_data; snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], active, active, 0); return 0; } static int snd_mixer_oss_put_recsrc1_route(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int active) { struct slot *slot = pslot->private_data; snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], active, active, 1); return 0; } static int snd_mixer_oss_get_recsrc2(struct snd_mixer_oss_file *fmixer, unsigned int *active_index) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_kcontrol *kctl; struct snd_mixer_oss_slot *pslot; struct slot *slot; struct snd_ctl_elem_info *uinfo; struct snd_ctl_elem_value *uctl; int err, idx; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) { err = -ENOMEM; goto __free_only; } down_read(&card->controls_rwsem); kctl = snd_mixer_oss_test_id(mixer, "Capture Source", 0); if (! kctl) { err = -ENOENT; goto __unlock; } err = kctl->info(kctl, uinfo); if (err < 0) goto __unlock; err = kctl->get(kctl, uctl); if (err < 0) goto __unlock; for (idx = 0; idx < 32; idx++) { if (!(mixer->mask_recsrc & (1 << idx))) continue; pslot = &mixer->slots[idx]; slot = pslot->private_data; if (slot->signature != SNDRV_MIXER_OSS_SIGNATURE) continue; if (!(slot->present & SNDRV_MIXER_OSS_PRESENT_CAPTURE)) continue; if (slot->capture_item == uctl->value.enumerated.item[0]) { *active_index = idx; break; } } err = 0; __unlock: up_read(&card->controls_rwsem); __free_only: kfree(uctl); kfree(uinfo); return err; } static int snd_mixer_oss_put_recsrc2(struct snd_mixer_oss_file *fmixer, unsigned int active_index) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_kcontrol *kctl; struct snd_mixer_oss_slot *pslot; struct slot *slot = NULL; struct snd_ctl_elem_info *uinfo; struct snd_ctl_elem_value *uctl; int err; unsigned int idx; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) { err = -ENOMEM; goto __free_only; } down_read(&card->controls_rwsem); kctl = snd_mixer_oss_test_id(mixer, "Capture Source", 0); if (! kctl) { err = -ENOENT; goto __unlock; } err = kctl->info(kctl, uinfo); if (err < 0) goto __unlock; for (idx = 0; idx < 32; idx++) { if (!(mixer->mask_recsrc & (1 << idx))) continue; pslot = &mixer->slots[idx]; slot = pslot->private_data; if (slot->signature != SNDRV_MIXER_OSS_SIGNATURE) continue; if (!(slot->present & SNDRV_MIXER_OSS_PRESENT_CAPTURE)) continue; if (idx == active_index) break; slot = NULL; } if (! slot) goto __unlock; for (idx = 0; idx < uinfo->count; idx++) uctl->value.enumerated.item[idx] = slot->capture_item; err = kctl->put(kctl, uctl); if (err > 0) snd_ctl_notify(fmixer->card, SNDRV_CTL_EVENT_MASK_VALUE, &kctl->id); err = 0; __unlock: up_read(&card->controls_rwsem); __free_only: kfree(uctl); kfree(uinfo); return err; } struct snd_mixer_oss_assign_table { int oss_id; const char *name; int index; }; static int snd_mixer_oss_build_test(struct snd_mixer_oss *mixer, struct slot *slot, const char *name, int index, int item) { struct snd_ctl_elem_info *info; struct snd_kcontrol *kcontrol; struct snd_card *card = mixer->card; int err; down_read(&card->controls_rwsem); kcontrol = snd_mixer_oss_test_id(mixer, name, index); if (kcontrol == NULL) { up_read(&card->controls_rwsem); return 0; } info = kmalloc(sizeof(*info), GFP_KERNEL); if (! info) { up_read(&card->controls_rwsem); return -ENOMEM; } err = kcontrol->info(kcontrol, info); if (err < 0) { up_read(&card->controls_rwsem); kfree(info); return err; } slot->numid[item] = kcontrol->id.numid; up_read(&card->controls_rwsem); if (info->count > slot->channels) slot->channels = info->count; slot->present |= 1 << item; kfree(info); return 0; } static void snd_mixer_oss_slot_free(struct snd_mixer_oss_slot *chn) { struct slot *p = chn->private_data; if (p) { if (p->allocated && p->assigned) { kfree_const(p->assigned->name); kfree_const(p->assigned); } kfree(p); } } static void mixer_slot_clear(struct snd_mixer_oss_slot *rslot) { int idx = rslot->number; /* remember this */ if (rslot->private_free) rslot->private_free(rslot); memset(rslot, 0, sizeof(*rslot)); rslot->number = idx; } /* In a separate function to keep gcc 3.2 happy - do NOT merge this in snd_mixer_oss_build_input! */ static int snd_mixer_oss_build_test_all(struct snd_mixer_oss *mixer, const struct snd_mixer_oss_assign_table *ptr, struct slot *slot) { char str[64]; int err; err = snd_mixer_oss_build_test(mixer, slot, ptr->name, ptr->index, SNDRV_MIXER_OSS_ITEM_GLOBAL); if (err) return err; sprintf(str, "%s Switch", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_GSWITCH); if (err) return err; sprintf(str, "%s Route", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_GROUTE); if (err) return err; sprintf(str, "%s Volume", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_GVOLUME); if (err) return err; sprintf(str, "%s Playback Switch", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_PSWITCH); if (err) return err; sprintf(str, "%s Playback Route", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_PROUTE); if (err) return err; sprintf(str, "%s Playback Volume", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_PVOLUME); if (err) return err; sprintf(str, "%s Capture Switch", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_CSWITCH); if (err) return err; sprintf(str, "%s Capture Route", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_CROUTE); if (err) return err; sprintf(str, "%s Capture Volume", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_CVOLUME); if (err) return err; return 0; } /* * build an OSS mixer element. * ptr_allocated means the entry is dynamically allocated (change via proc file). * when replace_old = 1, the old entry is replaced with the new one. */ static int snd_mixer_oss_build_input(struct snd_mixer_oss *mixer, const struct snd_mixer_oss_assign_table *ptr, int ptr_allocated, int replace_old) { struct slot slot; struct slot *pslot; struct snd_kcontrol *kctl; struct snd_mixer_oss_slot *rslot; char str[64]; /* check if already assigned */ if (mixer->slots[ptr->oss_id].get_volume && ! replace_old) return 0; memset(&slot, 0, sizeof(slot)); memset(slot.numid, 0xff, sizeof(slot.numid)); /* ID_UNKNOWN */ if (snd_mixer_oss_build_test_all(mixer, ptr, &slot)) return 0; down_read(&mixer->card->controls_rwsem); kctl = NULL; if (!ptr->index) kctl = snd_mixer_oss_test_id(mixer, "Capture Source", 0); if (kctl) { struct snd_ctl_elem_info *uinfo; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); if (! uinfo) { up_read(&mixer->card->controls_rwsem); return -ENOMEM; } if (kctl->info(kctl, uinfo)) { up_read(&mixer->card->controls_rwsem); kfree(uinfo); return 0; } strcpy(str, ptr->name); if (!strcmp(str, "Master")) strcpy(str, "Mix"); if (!strcmp(str, "Master Mono")) strcpy(str, "Mix Mono"); slot.capture_item = 0; if (!strcmp(uinfo->value.enumerated.name, str)) { slot.present |= SNDRV_MIXER_OSS_PRESENT_CAPTURE; } else { for (slot.capture_item = 1; slot.capture_item < uinfo->value.enumerated.items; slot.capture_item++) { uinfo->value.enumerated.item = slot.capture_item; if (kctl->info(kctl, uinfo)) { up_read(&mixer->card->controls_rwsem); kfree(uinfo); return 0; } if (!strcmp(uinfo->value.enumerated.name, str)) { slot.present |= SNDRV_MIXER_OSS_PRESENT_CAPTURE; break; } } } kfree(uinfo); } up_read(&mixer->card->controls_rwsem); if (slot.present != 0) { pslot = kmalloc(sizeof(slot), GFP_KERNEL); if (! pslot) return -ENOMEM; *pslot = slot; pslot->signature = SNDRV_MIXER_OSS_SIGNATURE; pslot->assigned = ptr; pslot->allocated = ptr_allocated; rslot = &mixer->slots[ptr->oss_id]; mixer_slot_clear(rslot); rslot->stereo = slot.channels > 1 ? 1 : 0; rslot->get_volume = snd_mixer_oss_get_volume1; rslot->put_volume = snd_mixer_oss_put_volume1; /* note: ES18xx have both Capture Source and XX Capture Volume !!! */ if (slot.present & SNDRV_MIXER_OSS_PRESENT_CSWITCH) { rslot->get_recsrc = snd_mixer_oss_get_recsrc1_sw; rslot->put_recsrc = snd_mixer_oss_put_recsrc1_sw; } else if (slot.present & SNDRV_MIXER_OSS_PRESENT_CROUTE) { rslot->get_recsrc = snd_mixer_oss_get_recsrc1_route; rslot->put_recsrc = snd_mixer_oss_put_recsrc1_route; } else if (slot.present & SNDRV_MIXER_OSS_PRESENT_CAPTURE) { mixer->mask_recsrc |= 1 << ptr->oss_id; } rslot->private_data = pslot; rslot->private_free = snd_mixer_oss_slot_free; return 1; } return 0; } #ifdef CONFIG_SND_PROC_FS /* */ #define MIXER_VOL(name) [SOUND_MIXER_##name] = #name static const char * const oss_mixer_names[SNDRV_OSS_MAX_MIXERS] = { MIXER_VOL(VOLUME), MIXER_VOL(BASS), MIXER_VOL(TREBLE), MIXER_VOL(SYNTH), MIXER_VOL(PCM), MIXER_VOL(SPEAKER), MIXER_VOL(LINE), MIXER_VOL(MIC), MIXER_VOL(CD), MIXER_VOL(IMIX), MIXER_VOL(ALTPCM), MIXER_VOL(RECLEV), MIXER_VOL(IGAIN), MIXER_VOL(OGAIN), MIXER_VOL(LINE1), MIXER_VOL(LINE2), MIXER_VOL(LINE3), MIXER_VOL(DIGITAL1), MIXER_VOL(DIGITAL2), MIXER_VOL(DIGITAL3), MIXER_VOL(PHONEIN), MIXER_VOL(PHONEOUT), MIXER_VOL(VIDEO), MIXER_VOL(RADIO), MIXER_VOL(MONITOR), }; /* * /proc interface */ static void snd_mixer_oss_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_mixer_oss *mixer = entry->private_data; int i; mutex_lock(&mixer->reg_mutex); for (i = 0; i < SNDRV_OSS_MAX_MIXERS; i++) { struct slot *p; if (! oss_mixer_names[i]) continue; p = (struct slot *)mixer->slots[i].private_data; snd_iprintf(buffer, "%s ", oss_mixer_names[i]); if (p && p->assigned) snd_iprintf(buffer, "\"%s\" %d\n", p->assigned->name, p->assigned->index); else snd_iprintf(buffer, "\"\" 0\n"); } mutex_unlock(&mixer->reg_mutex); } static void snd_mixer_oss_proc_write(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_mixer_oss *mixer = entry->private_data; char line[128], str[32], idxstr[16]; const char *cptr; unsigned int idx; int ch; struct snd_mixer_oss_assign_table *tbl; struct slot *slot; while (!snd_info_get_line(buffer, line, sizeof(line))) { cptr = snd_info_get_str(str, line, sizeof(str)); for (ch = 0; ch < SNDRV_OSS_MAX_MIXERS; ch++) if (oss_mixer_names[ch] && strcmp(oss_mixer_names[ch], str) == 0) break; if (ch >= SNDRV_OSS_MAX_MIXERS) { pr_err("ALSA: mixer_oss: invalid OSS volume '%s'\n", str); continue; } cptr = snd_info_get_str(str, cptr, sizeof(str)); if (! *str) { /* remove the entry */ mutex_lock(&mixer->reg_mutex); mixer_slot_clear(&mixer->slots[ch]); mutex_unlock(&mixer->reg_mutex); continue; } snd_info_get_str(idxstr, cptr, sizeof(idxstr)); idx = simple_strtoul(idxstr, NULL, 10); if (idx >= 0x4000) { /* too big */ pr_err("ALSA: mixer_oss: invalid index %d\n", idx); continue; } mutex_lock(&mixer->reg_mutex); slot = (struct slot *)mixer->slots[ch].private_data; if (slot && slot->assigned && slot->assigned->index == idx && ! strcmp(slot->assigned->name, str)) /* not changed */ goto __unlock; tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); if (!tbl) goto __unlock; tbl->oss_id = ch; tbl->name = kstrdup(str, GFP_KERNEL); if (! tbl->name) { kfree(tbl); goto __unlock; } tbl->index = idx; if (snd_mixer_oss_build_input(mixer, tbl, 1, 1) <= 0) { kfree(tbl->name); kfree(tbl); } __unlock: mutex_unlock(&mixer->reg_mutex); } } static void snd_mixer_oss_proc_init(struct snd_mixer_oss *mixer) { struct snd_info_entry *entry; entry = snd_info_create_card_entry(mixer->card, "oss_mixer", mixer->card->proc_root); if (! entry) return; entry->content = SNDRV_INFO_CONTENT_TEXT; entry->mode = S_IFREG | 0644; entry->c.text.read = snd_mixer_oss_proc_read; entry->c.text.write = snd_mixer_oss_proc_write; entry->private_data = mixer; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } mixer->proc_entry = entry; } static void snd_mixer_oss_proc_done(struct snd_mixer_oss *mixer) { snd_info_free_entry(mixer->proc_entry); mixer->proc_entry = NULL; } #else /* !CONFIG_SND_PROC_FS */ #define snd_mixer_oss_proc_init(mix) #define snd_mixer_oss_proc_done(mix) #endif /* CONFIG_SND_PROC_FS */ static void snd_mixer_oss_build(struct snd_mixer_oss *mixer) { static const struct snd_mixer_oss_assign_table table[] = { { SOUND_MIXER_VOLUME, "Master", 0 }, { SOUND_MIXER_VOLUME, "Front", 0 }, /* fallback */ { SOUND_MIXER_BASS, "Tone Control - Bass", 0 }, { SOUND_MIXER_TREBLE, "Tone Control - Treble", 0 }, { SOUND_MIXER_SYNTH, "Synth", 0 }, { SOUND_MIXER_SYNTH, "FM", 0 }, /* fallback */ { SOUND_MIXER_SYNTH, "Music", 0 }, /* fallback */ { SOUND_MIXER_PCM, "PCM", 0 }, { SOUND_MIXER_SPEAKER, "Beep", 0 }, { SOUND_MIXER_SPEAKER, "PC Speaker", 0 }, /* fallback */ { SOUND_MIXER_SPEAKER, "Speaker", 0 }, /* fallback */ { SOUND_MIXER_LINE, "Line", 0 }, { SOUND_MIXER_MIC, "Mic", 0 }, { SOUND_MIXER_CD, "CD", 0 }, { SOUND_MIXER_IMIX, "Monitor Mix", 0 }, { SOUND_MIXER_ALTPCM, "PCM", 1 }, { SOUND_MIXER_ALTPCM, "Headphone", 0 }, /* fallback */ { SOUND_MIXER_ALTPCM, "Wave", 0 }, /* fallback */ { SOUND_MIXER_RECLEV, "-- nothing --", 0 }, { SOUND_MIXER_IGAIN, "Capture", 0 }, { SOUND_MIXER_OGAIN, "Playback", 0 }, { SOUND_MIXER_LINE1, "Aux", 0 }, { SOUND_MIXER_LINE2, "Aux", 1 }, { SOUND_MIXER_LINE3, "Aux", 2 }, { SOUND_MIXER_DIGITAL1, "Digital", 0 }, { SOUND_MIXER_DIGITAL1, "IEC958", 0 }, /* fallback */ { SOUND_MIXER_DIGITAL1, "IEC958 Optical", 0 }, /* fallback */ { SOUND_MIXER_DIGITAL1, "IEC958 Coaxial", 0 }, /* fallback */ { SOUND_MIXER_DIGITAL2, "Digital", 1 }, { SOUND_MIXER_DIGITAL3, "Digital", 2 }, { SOUND_MIXER_PHONEIN, "Phone", 0 }, { SOUND_MIXER_PHONEOUT, "Master Mono", 0 }, { SOUND_MIXER_PHONEOUT, "Speaker", 0 }, /*fallback*/ { SOUND_MIXER_PHONEOUT, "Mono", 0 }, /*fallback*/ { SOUND_MIXER_PHONEOUT, "Phone", 0 }, /* fallback */ { SOUND_MIXER_VIDEO, "Video", 0 }, { SOUND_MIXER_RADIO, "Radio", 0 }, { SOUND_MIXER_MONITOR, "Monitor", 0 } }; unsigned int idx; for (idx = 0; idx < ARRAY_SIZE(table); idx++) snd_mixer_oss_build_input(mixer, &table[idx], 0, 0); if (mixer->mask_recsrc) { mixer->get_recsrc = snd_mixer_oss_get_recsrc2; mixer->put_recsrc = snd_mixer_oss_put_recsrc2; } } /* * */ static int snd_mixer_oss_free1(void *private) { struct snd_mixer_oss *mixer = private; struct snd_card *card; int idx; if (!mixer) return 0; card = mixer->card; if (snd_BUG_ON(mixer != card->mixer_oss)) return -ENXIO; card->mixer_oss = NULL; for (idx = 0; idx < SNDRV_OSS_MAX_MIXERS; idx++) { struct snd_mixer_oss_slot *chn = &mixer->slots[idx]; if (chn->private_free) chn->private_free(chn); } kfree(mixer); return 0; } static int snd_mixer_oss_notify_handler(struct snd_card *card, int cmd) { struct snd_mixer_oss *mixer; if (cmd == SND_MIXER_OSS_NOTIFY_REGISTER) { int idx, err; mixer = kcalloc(2, sizeof(*mixer), GFP_KERNEL); if (mixer == NULL) return -ENOMEM; mutex_init(&mixer->reg_mutex); err = snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, card, 0, &snd_mixer_oss_f_ops, card); if (err < 0) { dev_err(card->dev, "unable to register OSS mixer device %i:%i\n", card->number, 0); kfree(mixer); return err; } mixer->oss_dev_alloc = 1; mixer->card = card; if (*card->mixername) strscpy(mixer->name, card->mixername, sizeof(mixer->name)); else snprintf(mixer->name, sizeof(mixer->name), "mixer%i", card->number); #ifdef SNDRV_OSS_INFO_DEV_MIXERS snd_oss_info_register(SNDRV_OSS_INFO_DEV_MIXERS, card->number, mixer->name); #endif for (idx = 0; idx < SNDRV_OSS_MAX_MIXERS; idx++) mixer->slots[idx].number = idx; card->mixer_oss = mixer; snd_mixer_oss_build(mixer); snd_mixer_oss_proc_init(mixer); } else { mixer = card->mixer_oss; if (mixer == NULL) return 0; if (mixer->oss_dev_alloc) { #ifdef SNDRV_OSS_INFO_DEV_MIXERS snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number); #endif snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0); mixer->oss_dev_alloc = 0; } if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT) return 0; snd_mixer_oss_proc_done(mixer); return snd_mixer_oss_free1(mixer); } return 0; } static int __init alsa_mixer_oss_init(void) { struct snd_card *card; int idx; snd_mixer_oss_notify_callback = snd_mixer_oss_notify_handler; for (idx = 0; idx < SNDRV_CARDS; idx++) { card = snd_card_ref(idx); if (card) { snd_mixer_oss_notify_handler(card, SND_MIXER_OSS_NOTIFY_REGISTER); snd_card_unref(card); } } return 0; } static void __exit alsa_mixer_oss_exit(void) { struct snd_card *card; int idx; snd_mixer_oss_notify_callback = NULL; for (idx = 0; idx < SNDRV_CARDS; idx++) { card = snd_card_ref(idx); if (card) { snd_mixer_oss_notify_handler(card, SND_MIXER_OSS_NOTIFY_FREE); snd_card_unref(card); } } } module_init(alsa_mixer_oss_init) module_exit(alsa_mixer_oss_exit) |
9 46 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Authors: Lotsa people, from code originally in tcp */ #ifndef _INET6_HASHTABLES_H #define _INET6_HASHTABLES_H #if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/types.h> #include <linux/jhash.h> #include <net/inet_sock.h> #include <net/ipv6.h> #include <net/netns/hash.h> struct inet_hashinfo; static inline unsigned int __inet6_ehashfn(const u32 lhash, const u16 lport, const u32 fhash, const __be16 fport, const u32 initval) { const u32 ports = (((u32)lport) << 16) | (__force u32)fport; return jhash_3words(lhash, fhash, ports, initval); } /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif); typedef u32 (inet6_ehashfn_t)(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport); inet6_ehashfn_t inet6_ehashfn; INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn); struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, struct sk_buff *skb, int doff, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, inet6_ehashfn_t *ehashfn); struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif); struct sock *inet6_lookup_run_sk_lookup(struct net *net, int protocol, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, inet6_ehashfn_t *ehashfn); static inline struct sock *__inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif, bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif, sdif); } static inline struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, bool *refcounted, inet6_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool prefetched; sk = skb_steal_sock(skb, refcounted, &prefetched); if (!sk) return NULL; if (!prefetched || !sk_fullsock(sk)) return sk; if (sk->sk_protocol == IPPROTO_TCP) { if (sk->sk_state != TCP_LISTEN) return sk; } else if (sk->sk_protocol == IPPROTO_UDP) { if (sk->sk_state != TCP_CLOSE) return sk; } else { return sk; } reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, ntohs(dport), ehashfn); if (!reuse_sk) return sk; /* We've chosen a new reuseport sock which is never refcounted. This * implies that sk also isn't refcounted. */ WARN_ON_ONCE(*refcounted); return reuse_sk; } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, int iif, int sdif, bool *refcounted) { struct net *net = dev_net(skb_dst(skb)->dev); const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct sock *sk; sk = inet6_steal_sock(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, dport, refcounted, inet6_ehashfn); if (IS_ERR(sk)) return NULL; if (sk) return sk; return __inet6_lookup(net, hashinfo, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, ntohs(dport), iif, sdif, refcounted); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif); int inet6_hash(struct sock *sk); static inline bool inet6_match(struct net *net, const struct sock *sk, const struct in6_addr *saddr, const struct in6_addr *daddr, const __portpair ports, const int dif, const int sdif) { if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || sk->sk_portpair != ports || !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif); } #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _INET6_HASHTABLES_H */ |
3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_BUFFER_H #define _LINUX_TTY_BUFFER_H #include <linux/atomic.h> #include <linux/llist.h> #include <linux/mutex.h> #include <linux/workqueue.h> struct tty_buffer { union { struct tty_buffer *next; struct llist_node free; }; unsigned int used; unsigned int size; unsigned int commit; unsigned int lookahead; /* Lazy update on recv, can become less than "read" */ unsigned int read; bool flags; /* Data points here */ u8 data[] __aligned(sizeof(unsigned long)); }; static inline u8 *char_buf_ptr(struct tty_buffer *b, unsigned int ofs) { return b->data + ofs; } static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs) { return char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ atomic_t mem_used; /* In-use buffers excluding free list */ int mem_limit; struct tty_buffer *tail; /* Active buffer */ }; /* * When a break, frame error, or parity error happens, these codes are * stuffed into the flags buffer. */ #define TTY_NORMAL 0 #define TTY_BREAK 1 #define TTY_FRAME 2 #define TTY_PARITY 3 #define TTY_OVERRUN 4 #endif |
6257 279 411 63 13461 21 13447 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/export.h> #include <linux/fault-inject-usercopy.h> #include <linux/kasan-checks.h> #include <linux/thread_info.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/mm.h> #include <asm/byteorder.h> #include <asm/word-at-a-time.h> #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS #define IS_UNALIGNED(src, dst) 0 #else #define IS_UNALIGNED(src, dst) \ (((long) dst | (long) src) & (sizeof(long) - 1)) #endif /* * Do a strncpy, return length of string without final '\0'. * 'count' is the user-supplied count (return 'count' if we * hit it), 'max' is the address space maximum (and we return * -EFAULT if we hit it). */ static __always_inline long do_strncpy_from_user(char *dst, const char __user *src, unsigned long count, unsigned long max) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; unsigned long res = 0; if (IS_UNALIGNED(src, dst)) goto byte_at_a_time; while (max >= sizeof(unsigned long)) { unsigned long c, data, mask; /* Fall back to byte-at-a-time if we get a page fault */ unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time); /* * Note that we mask out the bytes following the NUL. This is * important to do because string oblivious code may read past * the NUL. For those routines, we don't want to give them * potentially random bytes after the NUL in `src`. * * One example of such code is BPF map keys. BPF treats map keys * as an opaque set of bytes. Without the post-NUL mask, any BPF * maps keyed by strings returned from strncpy_from_user() may * have multiple entries for semantically identical strings. */ if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); mask = zero_bytemask(data); *(unsigned long *)(dst+res) = c & mask; return res + find_zero(data); } *(unsigned long *)(dst+res) = c; res += sizeof(unsigned long); max -= sizeof(unsigned long); } byte_at_a_time: while (max) { char c; unsafe_get_user(c,src+res, efault); dst[res] = c; if (!c) return res; res++; max--; } /* * Uhhuh. We hit 'max'. But was that the user-specified maximum * too? If so, that's ok - we got as much as the user asked for. */ if (res >= count) return res; /* * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's an EFAULT. */ efault: return -EFAULT; } /** * strncpy_from_user: - Copy a NUL terminated string from userspace. * @dst: Destination address, in kernel space. This buffer must be at * least @count bytes long. * @src: Source address, in user space. * @count: Maximum number of bytes to copy, including the trailing NUL. * * Copies a NUL-terminated string from userspace to kernel space. * * On success, returns the length of the string (not including the trailing * NUL). * * If access to userspace fails, returns -EFAULT (some data may have been * copied). * * If @count is smaller than the length of the string, copies @count bytes * and returns @count. */ long strncpy_from_user(char *dst, const char __user *src, long count) { unsigned long max_addr, src_addr; might_fault(); if (should_fail_usercopy()) return -EFAULT; if (unlikely(count <= 0)) return 0; max_addr = TASK_SIZE_MAX; src_addr = (unsigned long)untagged_addr(src); if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; long retval; /* * Truncate 'max' to the user-specified limit, so that * we only have one limit we need to check in the loop */ if (max > count) max = count; kasan_check_write(dst, count); check_object_size(dst, count, false); if (user_read_access_begin(src, max)) { retval = do_strncpy_from_user(dst, src, count, max); user_read_access_end(); return retval; } } return -EFAULT; } EXPORT_SYMBOL(strncpy_from_user); |
4 18 20 43 9 106 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ #ifndef _EXFAT_FS_H #define _EXFAT_FS_H #include <linux/fs.h> #include <linux/ratelimit.h> #include <linux/nls.h> #include <linux/blkdev.h> #define EXFAT_ROOT_INO 1 #define EXFAT_CLUSTERS_UNTRACKED (~0u) /* * exfat error flags */ enum exfat_error_mode { EXFAT_ERRORS_CONT, /* ignore error and continue */ EXFAT_ERRORS_PANIC, /* panic on error */ EXFAT_ERRORS_RO, /* remount r/o on error */ }; /* * exfat nls lossy flag */ enum { NLS_NAME_NO_LOSSY = 0, /* no lossy */ NLS_NAME_LOSSY = 1 << 0, /* just detected incorrect filename(s) */ NLS_NAME_OVERLEN = 1 << 1, /* the length is over than its limit */ }; #define EXFAT_HASH_BITS 8 #define EXFAT_HASH_SIZE (1UL << EXFAT_HASH_BITS) /* * Type Definitions */ #define ES_2_ENTRIES 2 #define ES_ALL_ENTRIES 0 #define ES_IDX_FILE 0 #define ES_IDX_STREAM 1 #define ES_IDX_FIRST_FILENAME 2 #define EXFAT_FILENAME_ENTRY_NUM(name_len) \ DIV_ROUND_UP(name_len, EXFAT_FILE_NAME_LEN) #define ES_IDX_LAST_FILENAME(name_len) \ (ES_IDX_FIRST_FILENAME + EXFAT_FILENAME_ENTRY_NUM(name_len) - 1) #define DIR_DELETED 0xFFFFFFF7 /* type values */ #define TYPE_UNUSED 0x0000 #define TYPE_DELETED 0x0001 #define TYPE_INVALID 0x0002 #define TYPE_CRITICAL_PRI 0x0100 #define TYPE_BITMAP 0x0101 #define TYPE_UPCASE 0x0102 #define TYPE_VOLUME 0x0103 #define TYPE_DIR 0x0104 #define TYPE_FILE 0x011F #define TYPE_CRITICAL_SEC 0x0200 #define TYPE_STREAM 0x0201 #define TYPE_EXTEND 0x0202 #define TYPE_ACL 0x0203 #define TYPE_BENIGN_PRI 0x0400 #define TYPE_GUID 0x0401 #define TYPE_PADDING 0x0402 #define TYPE_ACLTAB 0x0403 #define TYPE_BENIGN_SEC 0x0800 #define TYPE_VENDOR_EXT 0x0801 #define TYPE_VENDOR_ALLOC 0x0802 #define MAX_CHARSET_SIZE 6 /* max size of multi-byte character */ #define MAX_NAME_LENGTH 255 /* max len of file name excluding NULL */ #define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH + 1) * MAX_CHARSET_SIZE) #define EXFAT_HINT_NONE -1 #define EXFAT_MIN_SUBDIR 2 /* * helpers for cluster size to byte conversion. */ #define EXFAT_CLU_TO_B(b, sbi) ((b) << (sbi)->cluster_size_bits) #define EXFAT_B_TO_CLU(b, sbi) ((b) >> (sbi)->cluster_size_bits) #define EXFAT_B_TO_CLU_ROUND_UP(b, sbi) \ (((b - 1) >> (sbi)->cluster_size_bits) + 1) #define EXFAT_CLU_OFFSET(off, sbi) ((off) & ((sbi)->cluster_size - 1)) /* * helpers for block size to byte conversion. */ #define EXFAT_BLK_TO_B(b, sb) ((b) << (sb)->s_blocksize_bits) #define EXFAT_B_TO_BLK(b, sb) ((b) >> (sb)->s_blocksize_bits) #define EXFAT_B_TO_BLK_ROUND_UP(b, sb) \ (((b - 1) >> (sb)->s_blocksize_bits) + 1) #define EXFAT_BLK_OFFSET(off, sb) ((off) & ((sb)->s_blocksize - 1)) /* * helpers for block size to dentry size conversion. */ #define EXFAT_B_TO_DEN(b) ((b) >> DENTRY_SIZE_BITS) #define EXFAT_DEN_TO_B(b) ((b) << DENTRY_SIZE_BITS) /* * helpers for cluster size to dentry size conversion. */ #define EXFAT_CLU_TO_DEN(clu, sbi) \ ((clu) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) #define EXFAT_DEN_TO_CLU(dentry, sbi) \ ((dentry) >> ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) /* * helpers for fat entry. */ #define FAT_ENT_SIZE (4) #define FAT_ENT_SIZE_BITS (2) #define FAT_ENT_OFFSET_SECTOR(sb, loc) (EXFAT_SB(sb)->FAT1_start_sector + \ (((u64)loc << FAT_ENT_SIZE_BITS) >> sb->s_blocksize_bits)) #define FAT_ENT_OFFSET_BYTE_IN_SECTOR(sb, loc) \ ((loc << FAT_ENT_SIZE_BITS) & (sb->s_blocksize - 1)) /* * helpers for bitmap. */ #define CLUSTER_TO_BITMAP_ENT(clu) ((clu) - EXFAT_RESERVED_CLUSTERS) #define BITMAP_ENT_TO_CLUSTER(ent) ((ent) + EXFAT_RESERVED_CLUSTERS) #define BITS_PER_SECTOR(sb) ((sb)->s_blocksize * BITS_PER_BYTE) #define BITS_PER_SECTOR_MASK(sb) (BITS_PER_SECTOR(sb) - 1) #define BITMAP_OFFSET_SECTOR_INDEX(sb, ent) \ ((ent / BITS_PER_BYTE) >> (sb)->s_blocksize_bits) #define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & BITS_PER_SECTOR_MASK(sb)) #define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \ ((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1)) #define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) - (clu_base))) - 1) #define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1) /* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */ #define ES_MAX_ENTRY_NUM ES_ENTRY_NUM(MAX_NAME_LENGTH) /* * 19 entries x 32 bytes/entry = 608 bytes. * The 608 bytes are in 3 sectors at most (even 512 Byte sector). */ #define DIR_CACHE_SIZE \ (DIV_ROUND_UP(EXFAT_DEN_TO_B(ES_MAX_ENTRY_NUM), SECTOR_SIZE) + 1) struct exfat_dentry_namebuf { char *lfn; int lfnbuf_len; /* usually MAX_UNINAME_BUF_SIZE */ }; /* unicode name structure */ struct exfat_uni_name { /* +3 for null and for converting */ unsigned short name[MAX_NAME_LENGTH + 3]; u16 name_hash; unsigned char name_len; }; /* directory structure */ struct exfat_chain { unsigned int dir; unsigned int size; unsigned char flags; }; /* first empty entry hint information */ struct exfat_hint_femp { /* entry index of a directory */ int eidx; /* count of continuous empty entry */ int count; /* the cluster that first empty slot exists in */ struct exfat_chain cur; }; /* hint structure */ struct exfat_hint { unsigned int clu; union { unsigned int off; /* cluster offset */ int eidx; /* entry index */ }; }; struct exfat_entry_set_cache { struct super_block *sb; unsigned int start_off; int num_bh; struct buffer_head *__bh[DIR_CACHE_SIZE]; struct buffer_head **bh; unsigned int num_entries; bool modified; }; #define IS_DYNAMIC_ES(es) ((es)->__bh != (es)->bh) struct exfat_dir_entry { struct exfat_chain dir; int entry; unsigned int type; unsigned int start_clu; unsigned char flags; unsigned short attr; loff_t size; loff_t valid_size; unsigned int num_subdirs; struct timespec64 atime; struct timespec64 mtime; struct timespec64 crtime; struct exfat_dentry_namebuf namebuf; }; /* * exfat mount in-memory data */ struct exfat_mount_options { kuid_t fs_uid; kgid_t fs_gid; unsigned short fs_fmask; unsigned short fs_dmask; /* permission for setting the [am]time */ unsigned short allow_utime; /* charset for filename input/display */ char *iocharset; /* on error: continue, panic, remount-ro */ enum exfat_error_mode errors; unsigned utf8:1, /* Use of UTF-8 character set */ sys_tz:1, /* Use local timezone */ discard:1, /* Issue discard requests on deletions */ keep_last_dots:1; /* Keep trailing periods in paths */ int time_offset; /* Offset of timestamps from UTC (in minutes) */ /* Support creating zero-size directory, default: false */ bool zero_size_dir; }; /* * EXFAT file system superblock in-memory data */ struct exfat_sb_info { unsigned long long num_sectors; /* num of sectors in volume */ unsigned int num_clusters; /* num of clusters in volume */ unsigned int cluster_size; /* cluster size in bytes */ unsigned int cluster_size_bits; unsigned int sect_per_clus; /* cluster size in sectors */ unsigned int sect_per_clus_bits; unsigned long long FAT1_start_sector; /* FAT1 start sector */ unsigned long long FAT2_start_sector; /* FAT2 start sector */ unsigned long long data_start_sector; /* data area start sector */ unsigned int num_FAT_sectors; /* num of FAT sectors */ unsigned int root_dir; /* root dir cluster */ unsigned int dentries_per_clu; /* num of dentries per cluster */ unsigned int vol_flags; /* volume flags */ unsigned int vol_flags_persistent; /* volume flags to retain */ struct buffer_head *boot_bh; /* buffer_head of BOOT sector */ unsigned int map_clu; /* allocation bitmap start cluster */ unsigned int map_sectors; /* num of allocation bitmap sectors */ struct buffer_head **vol_amap; /* allocation bitmap */ unsigned short *vol_utbl; /* upcase table */ unsigned int clu_srch_ptr; /* cluster search pointer */ unsigned int used_clusters; /* number of used clusters */ struct mutex s_lock; /* superblock lock */ struct mutex bitmap_lock; /* bitmap lock */ struct exfat_mount_options options; struct nls_table *nls_io; /* Charset used for input and display */ struct ratelimit_state ratelimit; spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; struct rcu_head rcu; }; #define EXFAT_CACHE_VALID 0 /* * EXFAT file system inode in-memory data */ struct exfat_inode_info { struct exfat_chain dir; int entry; unsigned int type; unsigned short attr; unsigned int start_clu; unsigned char flags; /* * the copy of low 32bit of i_version to check * the validation of hint_stat. */ unsigned int version; /* hint for cluster last accessed */ struct exfat_hint hint_bmap; /* hint for entry index we try to lookup next time */ struct exfat_hint hint_stat; /* hint for first empty entry */ struct exfat_hint_femp hint_femp; spinlock_t cache_lru_lock; struct list_head cache_lru; int nr_caches; /* for avoiding the race between alloc and free */ unsigned int cache_valid_id; /* * NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access. * physically allocated size. */ loff_t i_size_ondisk; /* block-aligned i_size (used in cont_write_begin) */ loff_t i_size_aligned; /* on-disk position of directory entry or 0 */ loff_t i_pos; loff_t valid_size; /* hash by i_location */ struct hlist_node i_hash_fat; /* protect bmap against truncate */ struct rw_semaphore truncate_lock; struct inode vfs_inode; /* File creation time */ struct timespec64 i_crtime; }; static inline struct exfat_sb_info *EXFAT_SB(struct super_block *sb) { return sb->s_fs_info; } static inline struct exfat_inode_info *EXFAT_I(struct inode *inode) { return container_of(inode, struct exfat_inode_info, vfs_inode); } /* * If ->i_mode can't hold 0222 (i.e. ATTR_RO), we use ->i_attrs to * save ATTR_RO instead of ->i_mode. * * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only * bit, it's just used as flag for app. */ static inline int exfat_mode_can_hold_ro(struct inode *inode) { struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); if (S_ISDIR(inode->i_mode)) return 0; if ((~sbi->options.fs_fmask) & 0222) return 1; return 0; } /* Convert attribute bits and a mask to the UNIX mode. */ static inline mode_t exfat_make_mode(struct exfat_sb_info *sbi, unsigned short attr, mode_t mode) { if ((attr & EXFAT_ATTR_READONLY) && !(attr & EXFAT_ATTR_SUBDIR)) mode &= ~0222; if (attr & EXFAT_ATTR_SUBDIR) return (mode & ~sbi->options.fs_dmask) | S_IFDIR; return (mode & ~sbi->options.fs_fmask) | S_IFREG; } /* Return the FAT attribute byte for this inode */ static inline unsigned short exfat_make_attr(struct inode *inode) { unsigned short attr = EXFAT_I(inode)->attr; if (S_ISDIR(inode->i_mode)) attr |= EXFAT_ATTR_SUBDIR; if (exfat_mode_can_hold_ro(inode) && !(inode->i_mode & 0222)) attr |= EXFAT_ATTR_READONLY; return attr; } static inline void exfat_save_attr(struct inode *inode, unsigned short attr) { if (exfat_mode_can_hold_ro(inode)) EXFAT_I(inode)->attr = attr & (EXFAT_ATTR_RWMASK | EXFAT_ATTR_READONLY); else EXFAT_I(inode)->attr = attr & EXFAT_ATTR_RWMASK; } static inline bool exfat_is_last_sector_in_cluster(struct exfat_sb_info *sbi, sector_t sec) { return ((sec - sbi->data_start_sector + 1) & ((1 << sbi->sect_per_clus_bits) - 1)) == 0; } static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi, unsigned int clus) { return ((sector_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->sect_per_clus_bits) + sbi->data_start_sector; } static inline unsigned int exfat_sector_to_cluster(struct exfat_sb_info *sbi, sector_t sec) { return ((sec - sbi->data_start_sector) >> sbi->sect_per_clus_bits) + EXFAT_RESERVED_CLUSTERS; } static inline bool is_valid_cluster(struct exfat_sb_info *sbi, unsigned int clus) { return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters; } /* super.c */ int exfat_set_volume_dirty(struct super_block *sb); int exfat_clear_volume_dirty(struct super_block *sb); /* fatent.c */ #define exfat_get_next_cluster(sb, pclu) exfat_ent_get(sb, *(pclu), pclu) int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, struct exfat_chain *p_chain, bool sync_bmap); int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain); int exfat_ent_get(struct super_block *sb, unsigned int loc, unsigned int *content); int exfat_ent_set(struct super_block *sb, unsigned int loc, unsigned int content); int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir, int entry, struct exfat_dentry *p_entry); int exfat_chain_cont_cluster(struct super_block *sb, unsigned int chain, unsigned int len); int exfat_zeroed_cluster(struct inode *dir, unsigned int clu); int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, unsigned int *ret_clu); int exfat_count_num_clusters(struct super_block *sb, struct exfat_chain *p_chain, unsigned int *ret_count); /* balloc.c */ int exfat_load_bitmap(struct super_block *sb); void exfat_free_bitmap(struct exfat_sb_info *sbi); int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync); void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu); int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count); int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); /* file.c */ extern const struct file_operations exfat_file_operations; int __exfat_truncate(struct inode *inode); void exfat_truncate(struct inode *inode); int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int exfat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, unsigned int request_mask, unsigned int query_flags); int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long exfat_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); /* namei.c */ extern const struct dentry_operations exfat_dentry_ops; extern const struct dentry_operations exfat_utf8_dentry_ops; /* cache.c */ int exfat_cache_init(void); void exfat_cache_shutdown(void); void exfat_cache_inval_inode(struct inode *inode); int exfat_get_cluster(struct inode *inode, unsigned int cluster, unsigned int *fclus, unsigned int *dclus, unsigned int *last_dclus, int allow_eof); /* dir.c */ extern const struct inode_operations exfat_dir_inode_operations; extern const struct file_operations exfat_dir_operations; unsigned int exfat_get_entry_type(struct exfat_dentry *p_entry); int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, int entry, unsigned int type, unsigned int start_clu, unsigned long long size); int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, int entry, int num_entries, struct exfat_uni_name *p_uniname); int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, int entry, int order, int num_entries); int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, int entry); void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es); int exfat_calc_num_entries(struct exfat_uni_name *p_uniname); int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, struct exfat_hint *hint_opt); int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu); struct exfat_dentry *exfat_get_dentry(struct super_block *sb, struct exfat_chain *p_dir, int entry, struct buffer_head **bh); struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, int num); int exfat_get_dentry_set(struct exfat_entry_set_cache *es, struct super_block *sb, struct exfat_chain *p_dir, int entry, unsigned int type); int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync); int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); /* inode.c */ extern const struct inode_operations exfat_file_inode_operations; void exfat_sync_inode(struct inode *inode); struct inode *exfat_build_inode(struct super_block *sb, struct exfat_dir_entry *info, loff_t i_pos); void exfat_hash_inode(struct inode *inode, loff_t i_pos); void exfat_unhash_inode(struct inode *inode); struct inode *exfat_iget(struct super_block *sb, loff_t i_pos); int __exfat_write_inode(struct inode *inode, int sync); int exfat_write_inode(struct inode *inode, struct writeback_control *wbc); void exfat_evict_inode(struct inode *inode); int exfat_block_truncate_page(struct inode *inode, loff_t from); /* exfat/nls.c */ unsigned short exfat_toupper(struct super_block *sb, unsigned short a); int exfat_uniname_ncmp(struct super_block *sb, unsigned short *a, unsigned short *b, unsigned int len); int exfat_utf16_to_nls(struct super_block *sb, struct exfat_uni_name *uniname, unsigned char *p_cstring, int len); int exfat_nls_to_utf16(struct super_block *sb, const unsigned char *p_cstring, const int len, struct exfat_uni_name *uniname, int *p_lossy); int exfat_create_upcase_table(struct super_block *sb); void exfat_free_upcase_table(struct exfat_sb_info *sbi); /* exfat/misc.c */ void __exfat_fs_error(struct super_block *sb, int report, const char *fmt, ...) __printf(3, 4) __cold; #define exfat_fs_error(sb, fmt, args...) \ __exfat_fs_error(sb, 1, fmt, ## args) #define exfat_fs_error_ratelimit(sb, fmt, args...) \ __exfat_fs_error(sb, __ratelimit(&EXFAT_SB(sb)->ratelimit), \ fmt, ## args) /* expand to pr_*() with prefix */ #define exfat_err(sb, fmt, ...) \ pr_err("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) #define exfat_warn(sb, fmt, ...) \ pr_warn("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) #define exfat_info(sb, fmt, ...) \ pr_info("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) #define exfat_debug(sb, fmt, ...) \ pr_debug("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u8 tz, __le16 time, __le16 date, u8 time_cs); void exfat_truncate_atime(struct timespec64 *ts); void exfat_truncate_inode_atime(struct inode *inode); void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u8 *tz, __le16 *time, __le16 *date, u8 *time_cs); u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type); u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type); void exfat_update_bh(struct buffer_head *bh, int sync); int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync); void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags); void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); #endif /* !_EXFAT_FS_H */ |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_TC_WRAPPER_H #define __NET_TC_WRAPPER_H #include <net/pkt_cls.h> #if IS_ENABLED(CONFIG_RETPOLINE) #include <linux/cpufeature.h> #include <linux/static_key.h> #include <linux/indirect_call_wrapper.h> #define TC_INDIRECT_SCOPE extern struct static_key_false tc_skip_wrapper; /* TC Actions */ #ifdef CONFIG_NET_CLS_ACT #define TC_INDIRECT_ACTION_DECLARE(fname) \ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ const struct tc_action *a, \ struct tcf_result *res)) TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act); TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act); TC_INDIRECT_ACTION_DECLARE(tcf_csum_act); TC_INDIRECT_ACTION_DECLARE(tcf_ct_act); TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act); TC_INDIRECT_ACTION_DECLARE(tcf_gact_act); TC_INDIRECT_ACTION_DECLARE(tcf_gate_act); TC_INDIRECT_ACTION_DECLARE(tcf_ife_act); TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act); TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act); TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act); TC_INDIRECT_ACTION_DECLARE(tcf_nat_act); TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act); TC_INDIRECT_ACTION_DECLARE(tcf_police_act); TC_INDIRECT_ACTION_DECLARE(tcf_sample_act); TC_INDIRECT_ACTION_DECLARE(tcf_simp_act); TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act); TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act); TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act); TC_INDIRECT_ACTION_DECLARE(tunnel_key_act); static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { if (static_branch_likely(&tc_skip_wrapper)) goto skip; #if IS_BUILTIN(CONFIG_NET_ACT_GACT) if (a->ops->act == tcf_gact_act) return tcf_gact_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_MIRRED) if (a->ops->act == tcf_mirred_act) return tcf_mirred_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_PEDIT) if (a->ops->act == tcf_pedit_act) return tcf_pedit_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) if (a->ops->act == tcf_skbedit_act) return tcf_skbedit_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) if (a->ops->act == tcf_skbmod_act) return tcf_skbmod_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_POLICE) if (a->ops->act == tcf_police_act) return tcf_police_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_BPF) if (a->ops->act == tcf_bpf_act) return tcf_bpf_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) if (a->ops->act == tcf_connmark_act) return tcf_connmark_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CSUM) if (a->ops->act == tcf_csum_act) return tcf_csum_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CT) if (a->ops->act == tcf_ct_act) return tcf_ct_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_CTINFO) if (a->ops->act == tcf_ctinfo_act) return tcf_ctinfo_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_GATE) if (a->ops->act == tcf_gate_act) return tcf_gate_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_MPLS) if (a->ops->act == tcf_mpls_act) return tcf_mpls_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_NAT) if (a->ops->act == tcf_nat_act) return tcf_nat_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) if (a->ops->act == tunnel_key_act) return tunnel_key_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_VLAN) if (a->ops->act == tcf_vlan_act) return tcf_vlan_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_IFE) if (a->ops->act == tcf_ife_act) return tcf_ife_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SIMP) if (a->ops->act == tcf_simp_act) return tcf_simp_act(skb, a, res); #endif #if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE) if (a->ops->act == tcf_sample_act) return tcf_sample_act(skb, a, res); #endif skip: return a->ops->act(skb, a, res); } #endif /* CONFIG_NET_CLS_ACT */ /* TC Filters */ #ifdef CONFIG_NET_CLS #define TC_INDIRECT_FILTER_DECLARE(fname) \ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ const struct tcf_proto *tp, \ struct tcf_result *res)) TC_INDIRECT_FILTER_DECLARE(basic_classify); TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify); TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify); TC_INDIRECT_FILTER_DECLARE(fl_classify); TC_INDIRECT_FILTER_DECLARE(flow_classify); TC_INDIRECT_FILTER_DECLARE(fw_classify); TC_INDIRECT_FILTER_DECLARE(mall_classify); TC_INDIRECT_FILTER_DECLARE(route4_classify); TC_INDIRECT_FILTER_DECLARE(u32_classify); static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { if (static_branch_likely(&tc_skip_wrapper)) goto skip; #if IS_BUILTIN(CONFIG_NET_CLS_BPF) if (tp->classify == cls_bpf_classify) return cls_bpf_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_U32) if (tp->classify == u32_classify) return u32_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_FLOWER) if (tp->classify == fl_classify) return fl_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_FW) if (tp->classify == fw_classify) return fw_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) if (tp->classify == mall_classify) return mall_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_BASIC) if (tp->classify == basic_classify) return basic_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) if (tp->classify == cls_cgroup_classify) return cls_cgroup_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_FLOW) if (tp->classify == flow_classify) return flow_classify(skb, tp, res); #endif #if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4) if (tp->classify == route4_classify) return route4_classify(skb, tp, res); #endif skip: return tp->classify(skb, tp, res); } #endif /* CONFIG_NET_CLS */ static inline void tc_wrapper_init(void) { #ifdef CONFIG_X86 if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) static_branch_enable(&tc_skip_wrapper); #endif } #else #define TC_INDIRECT_SCOPE static static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { return a->ops->act(skb, a, res); } static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { return tp->classify(skb, tp, res); } static inline void tc_wrapper_init(void) { } #endif #endif /* __NET_TC_WRAPPER_H */ |
1 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2022-2023 NXP */ #include "common.h" #include "netlink.h" struct mm_req_info { struct ethnl_req_info base; }; struct mm_reply_data { struct ethnl_reply_data base; struct ethtool_mm_state state; struct ethtool_mm_stats stats; }; #define MM_REPDATA(__reply_base) \ container_of(__reply_base, struct mm_reply_data, base) #define ETHTOOL_MM_STAT_CNT \ (__ETHTOOL_A_MM_STAT_CNT - (ETHTOOL_A_MM_STAT_PAD + 1)) const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1] = { [ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), }; static int mm_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct mm_reply_data *data = MM_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const struct ethtool_ops *ops; int ret; ops = dev->ethtool_ops; if (!ops->get_mm) return -EOPNOTSUPP; ethtool_stats_init((u64 *)&data->stats, sizeof(data->stats) / sizeof(u64)); ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = ops->get_mm(dev, &data->state); if (ret) goto out_complete; if (ops->get_mm_stats && (req_base->flags & ETHTOOL_FLAG_STATS)) ops->get_mm_stats(dev, &data->stats); out_complete: ethnl_ops_complete(dev); return ret; } static int mm_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { int len = 0; len += nla_total_size(sizeof(u8)); /* _MM_PMAC_ENABLED */ len += nla_total_size(sizeof(u8)); /* _MM_TX_ENABLED */ len += nla_total_size(sizeof(u8)); /* _MM_TX_ACTIVE */ len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_ENABLED */ len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_STATUS */ len += nla_total_size(sizeof(u32)); /* _MM_VERIFY_TIME */ len += nla_total_size(sizeof(u32)); /* _MM_MAX_VERIFY_TIME */ len += nla_total_size(sizeof(u32)); /* _MM_TX_MIN_FRAG_SIZE */ len += nla_total_size(sizeof(u32)); /* _MM_RX_MIN_FRAG_SIZE */ if (req_base->flags & ETHTOOL_FLAG_STATS) len += nla_total_size(0) + /* _MM_STATS */ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_MM_STAT_CNT; return len; } static int mm_put_stat(struct sk_buff *skb, u64 val, u16 attrtype) { if (val == ETHTOOL_STAT_NOT_SET) return 0; if (nla_put_u64_64bit(skb, attrtype, val, ETHTOOL_A_MM_STAT_PAD)) return -EMSGSIZE; return 0; } static int mm_put_stats(struct sk_buff *skb, const struct ethtool_mm_stats *stats) { struct nlattr *nest; nest = nla_nest_start(skb, ETHTOOL_A_MM_STATS); if (!nest) return -EMSGSIZE; if (mm_put_stat(skb, stats->MACMergeFrameAssErrorCount, ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS) || mm_put_stat(skb, stats->MACMergeFrameSmdErrorCount, ETHTOOL_A_MM_STAT_SMD_ERRORS) || mm_put_stat(skb, stats->MACMergeFrameAssOkCount, ETHTOOL_A_MM_STAT_REASSEMBLY_OK) || mm_put_stat(skb, stats->MACMergeFragCountRx, ETHTOOL_A_MM_STAT_RX_FRAG_COUNT) || mm_put_stat(skb, stats->MACMergeFragCountTx, ETHTOOL_A_MM_STAT_TX_FRAG_COUNT) || mm_put_stat(skb, stats->MACMergeHoldCount, ETHTOOL_A_MM_STAT_HOLD_COUNT)) goto err_cancel; nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int mm_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct mm_reply_data *data = MM_REPDATA(reply_base); const struct ethtool_mm_state *state = &data->state; if (nla_put_u8(skb, ETHTOOL_A_MM_TX_ENABLED, state->tx_enabled) || nla_put_u8(skb, ETHTOOL_A_MM_TX_ACTIVE, state->tx_active) || nla_put_u8(skb, ETHTOOL_A_MM_PMAC_ENABLED, state->pmac_enabled) || nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_ENABLED, state->verify_enabled) || nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_STATUS, state->verify_status) || nla_put_u32(skb, ETHTOOL_A_MM_VERIFY_TIME, state->verify_time) || nla_put_u32(skb, ETHTOOL_A_MM_MAX_VERIFY_TIME, state->max_verify_time) || nla_put_u32(skb, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, state->tx_min_frag_size) || nla_put_u32(skb, ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, state->rx_min_frag_size)) return -EMSGSIZE; if (req_base->flags & ETHTOOL_FLAG_STATS && mm_put_stats(skb, &data->stats)) return -EMSGSIZE; return 0; } const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1] = { [ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_MM_VERIFY_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_MM_VERIFY_TIME] = NLA_POLICY_RANGE(NLA_U32, 1, 128), [ETHTOOL_A_MM_TX_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_MM_PMAC_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_MM_TX_MIN_FRAG_SIZE] = NLA_POLICY_RANGE(NLA_U32, 60, 252), }; static void mm_state_to_cfg(const struct ethtool_mm_state *state, struct ethtool_mm_cfg *cfg) { /* We could also compare state->verify_status against * ETHTOOL_MM_VERIFY_STATUS_DISABLED, but state->verify_enabled * is more like an administrative state which should be seen in * ETHTOOL_MSG_MM_GET replies. For example, a port with verification * disabled might be in the ETHTOOL_MM_VERIFY_STATUS_INITIAL * if it's down. */ cfg->verify_enabled = state->verify_enabled; cfg->verify_time = state->verify_time; cfg->tx_enabled = state->tx_enabled; cfg->pmac_enabled = state->pmac_enabled; cfg->tx_min_frag_size = state->tx_min_frag_size; } static int ethnl_set_mm_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; return ops->get_mm && ops->set_mm ? 1 : -EOPNOTSUPP; } static int ethnl_set_mm(struct ethnl_req_info *req_info, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct net_device *dev = req_info->dev; struct ethtool_mm_state state = {}; struct nlattr **tb = info->attrs; struct ethtool_mm_cfg cfg = {}; bool mod = false; int ret; ret = dev->ethtool_ops->get_mm(dev, &state); if (ret) return ret; mm_state_to_cfg(&state, &cfg); ethnl_update_bool(&cfg.verify_enabled, tb[ETHTOOL_A_MM_VERIFY_ENABLED], &mod); ethnl_update_u32(&cfg.verify_time, tb[ETHTOOL_A_MM_VERIFY_TIME], &mod); ethnl_update_bool(&cfg.tx_enabled, tb[ETHTOOL_A_MM_TX_ENABLED], &mod); ethnl_update_bool(&cfg.pmac_enabled, tb[ETHTOOL_A_MM_PMAC_ENABLED], &mod); ethnl_update_u32(&cfg.tx_min_frag_size, tb[ETHTOOL_A_MM_TX_MIN_FRAG_SIZE], &mod); if (!mod) return 0; if (cfg.verify_time > state.max_verify_time) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MM_VERIFY_TIME], "verifyTime exceeds device maximum"); return -ERANGE; } if (cfg.verify_enabled && !cfg.tx_enabled) { NL_SET_ERR_MSG(extack, "Verification requires TX enabled"); return -EINVAL; } if (cfg.tx_enabled && !cfg.pmac_enabled) { NL_SET_ERR_MSG(extack, "TX enabled requires pMAC enabled"); return -EINVAL; } ret = dev->ethtool_ops->set_mm(dev, &cfg, extack); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_mm_request_ops = { .request_cmd = ETHTOOL_MSG_MM_GET, .reply_cmd = ETHTOOL_MSG_MM_GET_REPLY, .hdr_attr = ETHTOOL_A_MM_HEADER, .req_info_size = sizeof(struct mm_req_info), .reply_data_size = sizeof(struct mm_reply_data), .prepare_data = mm_prepare_data, .reply_size = mm_reply_size, .fill_reply = mm_fill_reply, .set_validate = ethnl_set_mm_validate, .set = ethnl_set_mm, .set_ntf_cmd = ETHTOOL_MSG_MM_NTF, }; /* Returns whether a given device supports the MAC merge layer * (has an eMAC and a pMAC). Must be called under rtnl_lock() and * ethnl_ops_begin(). */ bool __ethtool_dev_mm_supported(struct net_device *dev) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_mm_state state = {}; int ret = -EOPNOTSUPP; if (ops && ops->get_mm) ret = ops->get_mm(dev, &state); return !ret; } bool ethtool_dev_mm_supported(struct net_device *dev) { const struct ethtool_ops *ops = dev->ethtool_ops; bool supported; int ret; ASSERT_RTNL(); if (!ops) return false; ret = ethnl_ops_begin(dev); if (ret < 0) return false; supported = __ethtool_dev_mm_supported(dev); ethnl_ops_complete(dev); return supported; } EXPORT_SYMBOL_GPL(ethtool_dev_mm_supported); |
8 8 8 4 10 2 10 2 9 3 9 3 9 3 3 4 3 3 4 4 4 4 4 4 4 4 4 4 4 1 1 4 4 4 4 4 4 1 1 1 1 1 1 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 | // SPDX-License-Identifier: GPL-2.0 #include <linux/jhash.h> #include <linux/netfilter.h> #include <linux/rcupdate.h> #include <linux/rhashtable.h> #include <linux/vmalloc.h> #include <net/genetlink.h> #include <net/netns/generic.h> #include <uapi/linux/genetlink.h> #include "ila.h" struct ila_xlat_params { struct ila_params ip; int ifindex; }; struct ila_map { struct ila_xlat_params xp; struct rhash_head node; struct ila_map __rcu *next; struct rcu_head rcu; }; #define MAX_LOCKS 1024 #define LOCKS_PER_CPU 10 static int alloc_ila_locks(struct ila_net *ilan) { return alloc_bucket_spinlocks(&ilan->xlat.locks, &ilan->xlat.locks_mask, MAX_LOCKS, LOCKS_PER_CPU, GFP_KERNEL); } static u32 hashrnd __read_mostly; static __always_inline void __ila_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } static inline u32 ila_locator_hash(struct ila_locator loc) { u32 *v = (u32 *)loc.v32; __ila_hash_secret_init(); return jhash_2words(v[0], v[1], hashrnd); } static inline spinlock_t *ila_get_lock(struct ila_net *ilan, struct ila_locator loc) { return &ilan->xlat.locks[ila_locator_hash(loc) & ilan->xlat.locks_mask]; } static inline int ila_cmp_wildcards(struct ila_map *ila, struct ila_addr *iaddr, int ifindex) { return (ila->xp.ifindex && ila->xp.ifindex != ifindex); } static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *xp) { return (ila->xp.ifindex != xp->ifindex); } static int ila_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { const struct ila_map *ila = obj; return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key); } static inline int ila_order(struct ila_map *ila) { int score = 0; if (ila->xp.ifindex) score += 1 << 1; return score; } static const struct rhashtable_params rht_params = { .nelem_hint = 1024, .head_offset = offsetof(struct ila_map, node), .key_offset = offsetof(struct ila_map, xp.ip.locator_match), .key_len = sizeof(u64), /* identifier */ .max_size = 1048576, .min_size = 256, .automatic_shrinking = true, .obj_cmpfn = ila_cmpfn, }; static int parse_nl_config(struct genl_info *info, struct ila_xlat_params *xp) { memset(xp, 0, sizeof(*xp)); if (info->attrs[ILA_ATTR_LOCATOR]) xp->ip.locator.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR]); if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) xp->ip.locator_match.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR_MATCH]); if (info->attrs[ILA_ATTR_CSUM_MODE]) xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); else xp->ip.csum_mode = ILA_CSUM_NO_ACTION; if (info->attrs[ILA_ATTR_IDENT_TYPE]) xp->ip.ident_type = nla_get_u8( info->attrs[ILA_ATTR_IDENT_TYPE]); else xp->ip.ident_type = ILA_ATYPE_USE_FORMAT; if (info->attrs[ILA_ATTR_IFINDEX]) xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); return 0; } /* Must be called with rcu readlock */ static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr, int ifindex, struct ila_net *ilan) { struct ila_map *ila; ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &iaddr->loc, rht_params); while (ila) { if (!ila_cmp_wildcards(ila, iaddr, ifindex)) return ila; ila = rcu_access_pointer(ila->next); } return NULL; } /* Must be called with rcu readlock */ static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp, struct ila_net *ilan) { struct ila_map *ila; ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); while (ila) { if (!ila_cmp_params(ila, xp)) return ila; ila = rcu_access_pointer(ila->next); } return NULL; } static inline void ila_release(struct ila_map *ila) { kfree_rcu(ila, rcu); } static void ila_free_node(struct ila_map *ila) { struct ila_map *next; /* Assume rcu_readlock held */ while (ila) { next = rcu_access_pointer(ila->next); ila_release(ila); ila = next; } } static void ila_free_cb(void *ptr, void *arg) { ila_free_node((struct ila_map *)ptr); } static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila); static unsigned int ila_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { ila_xlat_addr(skb, false); return NF_ACCEPT; } static const struct nf_hook_ops ila_nf_hook_ops[] = { { .hook = ila_nf_input, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = -1, }, }; static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head; spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = 0, order; if (!ilan->xlat.hooks_registered) { /* We defer registering net hooks in the namespace until the * first mapping is added. */ err = nf_register_net_hooks(net, ila_nf_hook_ops, ARRAY_SIZE(ila_nf_hook_ops)); if (err) return err; ilan->xlat.hooks_registered = true; } ila = kzalloc(sizeof(*ila), GFP_KERNEL); if (!ila) return -ENOMEM; ila_init_saved_csum(&xp->ip); ila->xp = *xp; order = ila_order(ila); spin_lock(lock); head = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); if (!head) { /* New entry for the rhash_table */ err = rhashtable_lookup_insert_fast(&ilan->xlat.rhash_table, &ila->node, rht_params); } else { struct ila_map *tila = head, *prev = NULL; do { if (!ila_cmp_params(tila, xp)) { err = -EEXIST; goto out; } if (order > ila_order(tila)) break; prev = tila; tila = rcu_dereference_protected(tila->next, lockdep_is_held(lock)); } while (tila); if (prev) { /* Insert in sub list of head */ RCU_INIT_POINTER(ila->next, tila); rcu_assign_pointer(prev->next, ila); } else { /* Make this ila new head */ RCU_INIT_POINTER(ila->next, head); err = rhashtable_replace_fast(&ilan->xlat.rhash_table, &head->node, &ila->node, rht_params); if (err) goto out; } } out: spin_unlock(lock); if (err) kfree(ila); return err; } static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head, *prev; spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = -ENOENT; spin_lock(lock); head = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); ila = head; prev = NULL; while (ila) { if (ila_cmp_params(ila, xp)) { prev = ila; ila = rcu_dereference_protected(ila->next, lockdep_is_held(lock)); continue; } err = 0; if (prev) { /* Not head, just delete from list */ rcu_assign_pointer(prev->next, ila->next); } else { /* It is the head. If there is something in the * sublist we need to make a new head. */ head = rcu_dereference_protected(ila->next, lockdep_is_held(lock)); if (head) { /* Put first entry in the sublist into the * table */ err = rhashtable_replace_fast( &ilan->xlat.rhash_table, &ila->node, &head->node, rht_params); if (err) goto out; } else { /* Entry no longer used */ err = rhashtable_remove_fast( &ilan->xlat.rhash_table, &ila->node, rht_params); } } ila_release(ila); break; } out: spin_unlock(lock); return err; } int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_xlat_params p; int err; err = parse_nl_config(info, &p); if (err) return err; return ila_add_mapping(net, &p); } int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_xlat_params xp; int err; err = parse_nl_config(info, &xp); if (err) return err; ila_del_mapping(net, &xp); return 0; } static inline spinlock_t *lock_from_ila_map(struct ila_net *ilan, struct ila_map *ila) { return ila_get_lock(ilan, ila->xp.ip.locator_match); } int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_net *ilan = net_generic(net, ila_net_id); struct rhashtable_iter iter; struct ila_map *ila; spinlock_t *lock; int ret = 0; rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter); rhashtable_walk_start(&iter); for (;;) { ila = rhashtable_walk_next(&iter); if (IS_ERR(ila)) { if (PTR_ERR(ila) == -EAGAIN) continue; ret = PTR_ERR(ila); goto done; } else if (!ila) { break; } lock = lock_from_ila_map(ilan, ila); spin_lock(lock); ret = rhashtable_remove_fast(&ilan->xlat.rhash_table, &ila->node, rht_params); if (!ret) ila_free_node(ila); spin_unlock(lock); if (ret) break; } done: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); return ret; } static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) { if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR, (__force u64)ila->xp.ip.locator.v64, ILA_ATTR_PAD) || nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH, (__force u64)ila->xp.ip.locator_match.v64, ILA_ATTR_PAD) || nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) || nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type)) return -1; return 0; } static int ila_dump_info(struct ila_map *ila, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd); if (!hdr) return -ENOMEM; if (ila_fill_info(ila, skb) < 0) goto nla_put_failure; genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_net *ilan = net_generic(net, ila_net_id); struct sk_buff *msg; struct ila_xlat_params xp; struct ila_map *ila; int ret; ret = parse_nl_config(info, &xp); if (ret) return ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; rcu_read_lock(); ret = -ESRCH; ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, info->snd_portid, info->snd_seq, 0, msg, info->genlhdr->cmd); } rcu_read_unlock(); if (ret < 0) goto out_free; return genlmsg_reply(msg, info); out_free: nlmsg_free(msg); return ret; } struct ila_dump_iter { struct rhashtable_iter rhiter; int skip; }; int ila_xlat_nl_dump_start(struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_dump_iter *iter; iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter); iter->skip = 0; cb->args[0] = (long)iter; return 0; } int ila_xlat_nl_dump_done(struct netlink_callback *cb) { struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; rhashtable_walk_exit(&iter->rhiter); kfree(iter); return 0; } int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; struct rhashtable_iter *rhiter = &iter->rhiter; int skip = iter->skip; struct ila_map *ila; int ret; rhashtable_walk_start(rhiter); /* Get first entry */ ila = rhashtable_walk_peek(rhiter); if (ila && !IS_ERR(ila) && skip) { /* Skip over visited entries */ while (ila && skip) { /* Skip over any ila entries in this list that we * have already dumped. */ ila = rcu_access_pointer(ila->next); skip--; } } skip = 0; for (;;) { if (IS_ERR(ila)) { ret = PTR_ERR(ila); if (ret == -EAGAIN) { /* Table has changed and iter has reset. Return * -EAGAIN to the application even if we have * written data to the skb. The application * needs to deal with this. */ goto out_ret; } else { break; } } else if (!ila) { ret = 0; break; } while (ila) { ret = ila_dump_info(ila, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, ILA_CMD_GET); if (ret) goto out; skip++; ila = rcu_access_pointer(ila->next); } skip = 0; ila = rhashtable_walk_next(rhiter); } out: iter->skip = skip; ret = (skb->len ? : ret); out_ret: rhashtable_walk_stop(rhiter); return ret; } int ila_xlat_init_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); int err; err = alloc_ila_locks(ilan); if (err) return err; err = rhashtable_init(&ilan->xlat.rhash_table, &rht_params); if (err) { free_bucket_spinlocks(ilan->xlat.locks); return err; } return 0; } void ila_xlat_exit_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL); free_bucket_spinlocks(ilan->xlat.locks); if (ilan->xlat.hooks_registered) nf_unregister_net_hooks(net, ila_nf_hook_ops, ARRAY_SIZE(ila_nf_hook_ops)); } static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); /* Assumes skb contains a valid IPv6 header that is pulled */ /* No check here that ILA type in the mapping matches what is in the * address. We assume that whatever sender gaves us can be translated. * The checksum mode however is relevant. */ rcu_read_lock(); ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila); rcu_read_unlock(); return 0; } |
2 2 2 2 2 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/preempt.h> #include <linux/smp.h> #include <linux/completion.h> #include <asm/msr.h> static void __rdmsr_on_cpu(void *info) { struct msr_info *rv = info; struct msr *reg; int this_cpu = raw_smp_processor_id(); if (rv->msrs) reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; rdmsr(rv->msr_no, reg->l, reg->h); } static void __wrmsr_on_cpu(void *info) { struct msr_info *rv = info; struct msr *reg; int this_cpu = raw_smp_processor_id(); if (rv->msrs) reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; wrmsr(rv->msr_no, reg->l, reg->h); } int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); *l = rv.reg.l; *h = rv.reg.h; return err; } EXPORT_SYMBOL(rdmsr_on_cpu); int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); *q = rv.reg.q; return err; } EXPORT_SYMBOL(rdmsrl_on_cpu); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.l = l; rv.reg.h = h; err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); return err; } EXPORT_SYMBOL(wrmsr_on_cpu); int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.q = q; err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); return err; } EXPORT_SYMBOL(wrmsrl_on_cpu); static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs, void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; memset(&rv, 0, sizeof(rv)); rv.msrs = msrs; rv.msr_no = msr_no; this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) msr_func(&rv); smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } /* rdmsr on a bunch of CPUs * * @mask: which CPUs * @msr_no: which MSR * @msrs: array of MSR values * */ void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); } EXPORT_SYMBOL(rdmsr_on_cpus); /* * wrmsr on a bunch of CPUs * * @mask: which CPUs * @msr_no: which MSR * @msrs: array of MSR values * */ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); struct msr_info_completion { struct msr_info msr; struct completion done; }; /* These "safe" variants are slower and should be used when the target MSR may not actually exist. */ static void __rdmsr_safe_on_cpu(void *info) { struct msr_info_completion *rv = info; rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, &rv->msr.reg.h); complete(&rv->done); } static void __wrmsr_safe_on_cpu(void *info) { struct msr_info *rv = info; rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); } int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { struct msr_info_completion rv; call_single_data_t csd; int err; INIT_CSD(&csd, __rdmsr_safe_on_cpu, &rv); memset(&rv, 0, sizeof(rv)); init_completion(&rv.done); rv.msr.msr_no = msr_no; err = smp_call_function_single_async(cpu, &csd); if (!err) { wait_for_completion(&rv.done); err = rv.msr.err; } *l = rv.msr.reg.l; *h = rv.msr.reg.h; return err; } EXPORT_SYMBOL(rdmsr_safe_on_cpu); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.l = l; rv.reg.h = h; err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(wrmsr_safe_on_cpu); int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) { int err; struct msr_info rv; memset(&rv, 0, sizeof(rv)); rv.msr_no = msr_no; rv.reg.q = q; err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(wrmsrl_safe_on_cpu); int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { u32 low, high; int err; err = rdmsr_safe_on_cpu(cpu, msr_no, &low, &high); *q = (u64)high << 32 | low; return err; } EXPORT_SYMBOL(rdmsrl_safe_on_cpu); /* * These variants are significantly slower, but allows control over * the entire 32-bit GPR set. */ static void __rdmsr_safe_regs_on_cpu(void *info) { struct msr_regs_info *rv = info; rv->err = rdmsr_safe_regs(rv->regs); } static void __wrmsr_safe_regs_on_cpu(void *info) { struct msr_regs_info *rv = info; rv->err = wrmsr_safe_regs(rv->regs); } int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { int err; struct msr_regs_info rv; rv.regs = regs; rv.err = -EIO; err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { int err; struct msr_regs_info rv; rv.regs = regs; rv.err = -EIO; err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); return err ? err : rv.err; } EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); |
2 3 2 2 2 8 3 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 | // SPDX-License-Identifier: GPL-2.0-only /* * VMware VMCI Driver * * Copyright (C) 2012 VMware, Inc. All rights reserved. */ #include <linux/vmw_vmci_defs.h> #include <linux/vmw_vmci_api.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/bug.h> #include "vmci_datagram.h" #include "vmci_resource.h" #include "vmci_context.h" #include "vmci_driver.h" #include "vmci_event.h" #include "vmci_route.h" /* * struct datagram_entry describes the datagram entity. It is used for datagram * entities created only on the host. */ struct datagram_entry { struct vmci_resource resource; u32 flags; bool run_delayed; vmci_datagram_recv_cb recv_cb; void *client_data; u32 priv_flags; }; struct delayed_datagram_info { struct datagram_entry *entry; struct work_struct work; bool in_dg_host_queue; /* msg and msg_payload must be together. */ struct vmci_datagram msg; u8 msg_payload[]; }; /* Number of in-flight host->host datagrams */ static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0); /* * Create a datagram entry given a handle pointer. */ static int dg_create_handle(u32 resource_id, u32 flags, u32 priv_flags, vmci_datagram_recv_cb recv_cb, void *client_data, struct vmci_handle *out_handle) { int result; u32 context_id; struct vmci_handle handle; struct datagram_entry *entry; if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0) return VMCI_ERROR_INVALID_ARGS; if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) { context_id = VMCI_INVALID_ID; } else { context_id = vmci_get_context_id(); if (context_id == VMCI_INVALID_ID) return VMCI_ERROR_NO_RESOURCES; } handle = vmci_make_handle(context_id, resource_id); entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { pr_warn("Failed allocating memory for datagram entry\n"); return VMCI_ERROR_NO_MEM; } entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false; entry->flags = flags; entry->recv_cb = recv_cb; entry->client_data = client_data; entry->priv_flags = priv_flags; /* Make datagram resource live. */ result = vmci_resource_add(&entry->resource, VMCI_RESOURCE_TYPE_DATAGRAM, handle); if (result != VMCI_SUCCESS) { pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n", handle.context, handle.resource, result); kfree(entry); return result; } *out_handle = vmci_resource_handle(&entry->resource); return VMCI_SUCCESS; } /* * Internal utility function with the same purpose as * vmci_datagram_get_priv_flags that also takes a context_id. */ static int vmci_datagram_get_priv_flags(u32 context_id, struct vmci_handle handle, u32 *priv_flags) { if (context_id == VMCI_INVALID_ID) return VMCI_ERROR_INVALID_ARGS; if (context_id == VMCI_HOST_CONTEXT_ID) { struct datagram_entry *src_entry; struct vmci_resource *resource; resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) return VMCI_ERROR_INVALID_ARGS; src_entry = container_of(resource, struct datagram_entry, resource); *priv_flags = src_entry->priv_flags; vmci_resource_put(resource); } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID) *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS; else *priv_flags = vmci_context_get_priv_flags(context_id); return VMCI_SUCCESS; } /* * Calls the specified callback in a delayed context. */ static void dg_delayed_dispatch(struct work_struct *work) { struct delayed_datagram_info *dg_info = container_of(work, struct delayed_datagram_info, work); dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg); vmci_resource_put(&dg_info->entry->resource); if (dg_info->in_dg_host_queue) atomic_dec(&delayed_dg_host_queue_size); kfree(dg_info); } /* * Dispatch datagram as a host, to the host, or other vm context. This * function cannot dispatch to hypervisor context handlers. This should * have been handled before we get here by vmci_datagram_dispatch. * Returns number of bytes sent on success, error code otherwise. */ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) { int retval; size_t dg_size; u32 src_priv_flags; dg_size = VMCI_DG_SIZE(dg); /* Host cannot send to the hypervisor. */ if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID) return VMCI_ERROR_DST_UNREACHABLE; /* Check that source handle matches sending context. */ if (dg->src.context != context_id) { pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n", context_id, dg->src.context, dg->src.resource); return VMCI_ERROR_NO_ACCESS; } /* Get hold of privileges of sending endpoint. */ retval = vmci_datagram_get_priv_flags(context_id, dg->src, &src_priv_flags); if (retval != VMCI_SUCCESS) { pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n", dg->src.context, dg->src.resource); return retval; } /* Determine if we should route to host or guest destination. */ if (dg->dst.context == VMCI_HOST_CONTEXT_ID) { /* Route to host datagram entry. */ struct datagram_entry *dst_entry; struct vmci_resource *resource; if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && dg->dst.resource == VMCI_EVENT_HANDLER) { return vmci_event_dispatch(dg); } resource = vmci_resource_by_handle(dg->dst, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) { pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n", dg->dst.context, dg->dst.resource); return VMCI_ERROR_INVALID_RESOURCE; } dst_entry = container_of(resource, struct datagram_entry, resource); if (vmci_deny_interaction(src_priv_flags, dst_entry->priv_flags)) { vmci_resource_put(resource); return VMCI_ERROR_NO_ACCESS; } /* * If a VMCI datagram destined for the host is also sent by the * host, we always run it delayed. This ensures that no locks * are held when the datagram callback runs. */ if (dst_entry->run_delayed || dg->src.context == VMCI_HOST_CONTEXT_ID) { struct delayed_datagram_info *dg_info; if (atomic_add_return(1, &delayed_dg_host_queue_size) == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) { atomic_dec(&delayed_dg_host_queue_size); vmci_resource_put(resource); return VMCI_ERROR_NO_MEM; } dg_info = kmalloc(sizeof(*dg_info) + (size_t) dg->payload_size, GFP_ATOMIC); if (!dg_info) { atomic_dec(&delayed_dg_host_queue_size); vmci_resource_put(resource); return VMCI_ERROR_NO_MEM; } dg_info->in_dg_host_queue = true; dg_info->entry = dst_entry; memcpy(&dg_info->msg, dg, dg_size); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); retval = VMCI_SUCCESS; } else { retval = dst_entry->recv_cb(dst_entry->client_data, dg); vmci_resource_put(resource); if (retval < VMCI_SUCCESS) return retval; } } else { /* Route to destination VM context. */ struct vmci_datagram *new_dg; if (context_id != dg->dst.context) { if (vmci_deny_interaction(src_priv_flags, vmci_context_get_priv_flags (dg->dst.context))) { return VMCI_ERROR_NO_ACCESS; } else if (VMCI_CONTEXT_IS_VM(context_id)) { /* * If the sending context is a VM, it * cannot reach another VM. */ pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n", context_id, dg->dst.context); return VMCI_ERROR_DST_UNREACHABLE; } } /* We make a copy to enqueue. */ new_dg = kmemdup(dg, dg_size, GFP_KERNEL); if (new_dg == NULL) return VMCI_ERROR_NO_MEM; retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg); if (retval < VMCI_SUCCESS) { kfree(new_dg); return retval; } } /* * We currently truncate the size to signed 32 bits. This doesn't * matter for this handler as it only support 4Kb messages. */ return (int)dg_size; } /* * Dispatch datagram as a guest, down through the VMX and potentially to * the host. * Returns number of bytes sent on success, error code otherwise. */ static int dg_dispatch_as_guest(struct vmci_datagram *dg) { int retval; struct vmci_resource *resource; resource = vmci_resource_by_handle(dg->src, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) return VMCI_ERROR_NO_HANDLE; retval = vmci_send_datagram(dg); vmci_resource_put(resource); return retval; } /* * Dispatch datagram. This will determine the routing for the datagram * and dispatch it accordingly. * Returns number of bytes sent on success, error code otherwise. */ int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg, bool from_guest) { int retval; enum vmci_route route; BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24); if (dg->payload_size > VMCI_MAX_DG_SIZE || VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) { pr_devel("Payload (size=%llu bytes) too big to send\n", (unsigned long long)dg->payload_size); return VMCI_ERROR_INVALID_ARGS; } retval = vmci_route(&dg->src, &dg->dst, from_guest, &route); if (retval < VMCI_SUCCESS) { pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n", dg->src.context, dg->dst.context, retval); return retval; } if (VMCI_ROUTE_AS_HOST == route) { if (VMCI_INVALID_ID == context_id) context_id = VMCI_HOST_CONTEXT_ID; return dg_dispatch_as_host(context_id, dg); } if (VMCI_ROUTE_AS_GUEST == route) return dg_dispatch_as_guest(dg); pr_warn("Unknown route (%d) for datagram\n", route); return VMCI_ERROR_DST_UNREACHABLE; } /* * Invoke the handler for the given datagram. This is intended to be * called only when acting as a guest and receiving a datagram from the * virtual device. */ int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) { struct vmci_resource *resource; struct datagram_entry *dst_entry; resource = vmci_resource_by_handle(dg->dst, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) { pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n", dg->dst.context, dg->dst.resource); return VMCI_ERROR_NO_HANDLE; } dst_entry = container_of(resource, struct datagram_entry, resource); if (dst_entry->run_delayed) { struct delayed_datagram_info *dg_info; dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size, GFP_ATOMIC); if (!dg_info) { vmci_resource_put(resource); return VMCI_ERROR_NO_MEM; } dg_info->in_dg_host_queue = false; dg_info->entry = dst_entry; memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg)); INIT_WORK(&dg_info->work, dg_delayed_dispatch); schedule_work(&dg_info->work); } else { dst_entry->recv_cb(dst_entry->client_data, dg); vmci_resource_put(resource); } return VMCI_SUCCESS; } /* * vmci_datagram_create_handle_priv() - Create host context datagram endpoint * @resource_id: The resource ID. * @flags: Datagram Flags. * @priv_flags: Privilege Flags. * @recv_cb: Callback when receiving datagrams. * @client_data: Pointer for a datagram_entry struct * @out_handle: vmci_handle that is populated as a result of this function. * * Creates a host context datagram endpoint and returns a handle to it. */ int vmci_datagram_create_handle_priv(u32 resource_id, u32 flags, u32 priv_flags, vmci_datagram_recv_cb recv_cb, void *client_data, struct vmci_handle *out_handle) { if (out_handle == NULL) return VMCI_ERROR_INVALID_ARGS; if (recv_cb == NULL) { pr_devel("Client callback needed when creating datagram\n"); return VMCI_ERROR_INVALID_ARGS; } if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) return VMCI_ERROR_INVALID_ARGS; return dg_create_handle(resource_id, flags, priv_flags, recv_cb, client_data, out_handle); } EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv); /* * vmci_datagram_create_handle() - Create host context datagram endpoint * @resource_id: Resource ID. * @flags: Datagram Flags. * @recv_cb: Callback when receiving datagrams. * @client_ata: Pointer for a datagram_entry struct * @out_handle: vmci_handle that is populated as a result of this function. * * Creates a host context datagram endpoint and returns a handle to * it. Same as vmci_datagram_create_handle_priv without the priviledge * flags argument. */ int vmci_datagram_create_handle(u32 resource_id, u32 flags, vmci_datagram_recv_cb recv_cb, void *client_data, struct vmci_handle *out_handle) { return vmci_datagram_create_handle_priv( resource_id, flags, VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS, recv_cb, client_data, out_handle); } EXPORT_SYMBOL_GPL(vmci_datagram_create_handle); /* * vmci_datagram_destroy_handle() - Destroys datagram handle * @handle: vmci_handle to be destroyed and reaped. * * Use this function to destroy any datagram handles created by * vmci_datagram_create_handle{,Priv} functions. */ int vmci_datagram_destroy_handle(struct vmci_handle handle) { struct datagram_entry *entry; struct vmci_resource *resource; resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM); if (!resource) { pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n", handle.context, handle.resource); return VMCI_ERROR_NOT_FOUND; } entry = container_of(resource, struct datagram_entry, resource); vmci_resource_put(&entry->resource); vmci_resource_remove(&entry->resource); kfree(entry); return VMCI_SUCCESS; } EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle); /* * vmci_datagram_send() - Send a datagram * @msg: The datagram to send. * * Sends the provided datagram on its merry way. */ int vmci_datagram_send(struct vmci_datagram *msg) { if (msg == NULL) return VMCI_ERROR_INVALID_ARGS; return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false); } EXPORT_SYMBOL_GPL(vmci_datagram_send); |
71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | // SPDX-License-Identifier: GPL-2.0 /* RTT/RTO calculation. * * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) * * https://tools.ietf.org/html/rfc6298 * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf */ #include <linux/net.h> #include "ar-internal.h" #define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) #define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) { return 200; } static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) { return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); } static u32 rxrpc_bound_rto(u32 rto) { return min(rto, RXRPC_RTO_MAX); } /* * Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 * piece by Van Jacobson. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) { long m = sample_rtt_us; /* RTT */ u32 srtt = peer->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev * are scaled versions of rtt and mean deviation. * This is designed to be as fast as possible * m stands for "measurement". * * On a 1990 paper the rto value is changed to: * RTO = rtt + 4 * mdev * * Funny. This algorithm seems to be very broken. * These formulae increase RTO, when it should be decreased, increase * too slowly, when it should be increased quickly, decrease too quickly * etc. I guess in BSD RTO takes ONE value, so that it is absolutely * does not matter how to _calculate_ it. Seems, it was trap * that VJ failed to avoid. 8) */ if (srtt != 0) { m -= (srtt >> 3); /* m is now error in rtt est */ srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ m -= (peer->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain * for mdev in this case (alpha*beta). * Like Eifel it also prevents growth of rto, * but also it limits too fast rto decreases, * happening in pure Eifel. */ if (m > 0) m >>= 3; } else { m -= (peer->mdev_us >> 2); /* similar update on mdev */ } peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ if (peer->mdev_us > peer->mdev_max_us) { peer->mdev_max_us = peer->mdev_us; if (peer->mdev_max_us > peer->rttvar_us) peer->rttvar_us = peer->mdev_max_us; } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); peer->mdev_max_us = peer->rttvar_us; } peer->srtt_us = max(1U, srtt); } /* * Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ static void rxrpc_set_rto(struct rxrpc_peer *peer) { u32 rto; /* 1. If rtt variance happened to be less 50msec, it is hallucination. * It cannot be less due to utterly erratic ACK generation made * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ * to do with delayed acks, because at cwnd>2 true delack timeout * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ rto = __rxrpc_set_rto(peer); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo * guarantees that rto is higher. */ peer->rto_j = rxrpc_bound_rto(rto); } static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) { if (rtt_us < 0) return; //rxrpc_update_rtt_min(peer, rtt_us); rxrpc_rtt_estimator(peer, rtt_us); rxrpc_set_rto(peer); /* RFC6298: only reset backoff on valid RTT measurement. */ peer->backoff = 0; } /* * Add RTT information to cache. This is called in softirq mode and has * exclusive access to the peer RTT data. */ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int rtt_slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, ktime_t send_time, ktime_t resp_time) { struct rxrpc_peer *peer = call->peer; s64 rtt_us; rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); if (rtt_us < 0) return; spin_lock(&peer->rtt_input_lock); rxrpc_ack_update_rtt(peer, rtt_us); if (peer->rtt_count < 3) peer->rtt_count++; spin_unlock(&peer->rtt_input_lock); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, peer->srtt_us >> 3, peer->rto_j); } /* * Get the retransmission timeout to set in jiffies, backing it off each time * we retransmit. */ unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) { u64 timo_j; u8 backoff = READ_ONCE(peer->backoff); timo_j = peer->rto_j; timo_j <<= backoff; if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) WRITE_ONCE(peer->backoff, backoff + 1); if (timo_j < 1) timo_j = 1; return timo_j; } void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) { peer->rto_j = RXRPC_TIMEOUT_INIT; peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); peer->backoff = 0; //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); } |
6 76 76 76 39 39 8 74 45 26 76 78 18 76 8 70 8 67 76 76 75 8 68 26 22 4 26 76 76 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 | // SPDX-License-Identifier: GPL-2.0 #include "messages.h" #include "ctree.h" #include "delalloc-space.h" #include "block-rsv.h" #include "btrfs_inode.h" #include "space-info.h" #include "transaction.h" #include "qgroup.h" #include "block-group.h" #include "fs.h" /* * HOW DOES THIS WORK * * There are two stages to data reservations, one for data and one for metadata * to handle the new extents and checksums generated by writing data. * * * DATA RESERVATION * The general flow of the data reservation is as follows * * -> Reserve * We call into btrfs_reserve_data_bytes() for the user request bytes that * they wish to write. We make this reservation and add it to * space_info->bytes_may_use. We set EXTENT_DELALLOC on the inode io_tree * for the range and carry on if this is buffered, or follow up trying to * make a real allocation if we are pre-allocating or doing O_DIRECT. * * -> Use * At writepages()/prealloc/O_DIRECT time we will call into * btrfs_reserve_extent() for some part or all of this range of bytes. We * will make the allocation and subtract space_info->bytes_may_use by the * original requested length and increase the space_info->bytes_reserved by * the allocated length. This distinction is important because compression * may allocate a smaller on disk extent than we previously reserved. * * -> Allocation * finish_ordered_io() will insert the new file extent item for this range, * and then add a delayed ref update for the extent tree. Once that delayed * ref is written the extent size is subtracted from * space_info->bytes_reserved and added to space_info->bytes_used. * * Error handling * * -> By the reservation maker * This is the simplest case, we haven't completed our operation and we know * how much we reserved, we can simply call * btrfs_free_reserved_data_space*() and it will be removed from * space_info->bytes_may_use. * * -> After the reservation has been made, but before cow_file_range() * This is specifically for the delalloc case. You must clear * EXTENT_DELALLOC with the EXTENT_CLEAR_DATA_RESV bit, and the range will * be subtracted from space_info->bytes_may_use. * * METADATA RESERVATION * The general metadata reservation lifetimes are discussed elsewhere, this * will just focus on how it is used for delalloc space. * * We keep track of two things on a per inode bases * * ->outstanding_extents * This is the number of file extent items we'll need to handle all of the * outstanding DELALLOC space we have in this inode. We limit the maximum * size of an extent, so a large contiguous dirty area may require more than * one outstanding_extent, which is why count_max_extents() is used to * determine how many outstanding_extents get added. * * ->csum_bytes * This is essentially how many dirty bytes we have for this inode, so we * can calculate the number of checksum items we would have to add in order * to checksum our outstanding data. * * We keep a per-inode block_rsv in order to make it easier to keep track of * our reservation. We use btrfs_calculate_inode_block_rsv_size() to * calculate the current theoretical maximum reservation we would need for the * metadata for this inode. We call this and then adjust our reservation as * necessary, either by attempting to reserve more space, or freeing up excess * space. * * OUTSTANDING_EXTENTS HANDLING * * ->outstanding_extents is used for keeping track of how many extents we will * need to use for this inode, and it will fluctuate depending on where you are * in the life cycle of the dirty data. Consider the following normal case for * a completely clean inode, with a num_bytes < our maximum allowed extent size * * -> reserve * ->outstanding_extents += 1 (current value is 1) * * -> set_delalloc * ->outstanding_extents += 1 (current value is 2) * * -> btrfs_delalloc_release_extents() * ->outstanding_extents -= 1 (current value is 1) * * We must call this once we are done, as we hold our reservation for the * duration of our operation, and then assume set_delalloc will update the * counter appropriately. * * -> add ordered extent * ->outstanding_extents += 1 (current value is 2) * * -> btrfs_clear_delalloc_extent * ->outstanding_extents -= 1 (current value is 1) * * -> finish_ordered_io/btrfs_remove_ordered_extent * ->outstanding_extents -= 1 (current value is 0) * * Each stage is responsible for their own accounting of the extent, thus * making error handling and cleanup easier. */ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA; /* Make sure bytes are sectorsize aligned */ bytes = ALIGN(bytes, fs_info->sectorsize); if (btrfs_is_free_space_inode(inode)) flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE; return btrfs_reserve_data_bytes(fs_info, bytes, flush); } int btrfs_check_data_free_space(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len, bool noflush) { struct btrfs_fs_info *fs_info = inode->root->fs_info; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA; int ret; /* align the range */ len = round_up(start + len, fs_info->sectorsize) - round_down(start, fs_info->sectorsize); start = round_down(start, fs_info->sectorsize); if (noflush) flush = BTRFS_RESERVE_NO_FLUSH; else if (btrfs_is_free_space_inode(inode)) flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE; ret = btrfs_reserve_data_bytes(fs_info, len, flush); if (ret < 0) return ret; /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); if (ret < 0) { btrfs_free_reserved_data_space_noquota(fs_info, len); extent_changeset_free(*reserved); *reserved = NULL; } else { ret = 0; } return ret; } /* * Called if we need to clear a data reservation for this inode * Normally in a error case. * * This one will *NOT* use accurate qgroup reserved space API, just for case * which we can't sleep and is sure it won't affect qgroup reserved space. * Like clear_bit_hook(). */ void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info, u64 len) { struct btrfs_space_info *data_sinfo; ASSERT(IS_ALIGNED(len, fs_info->sectorsize)); data_sinfo = fs_info->data_sinfo; btrfs_space_info_free_bytes_may_use(fs_info, data_sinfo, len); } /* * Called if we need to clear a data reservation for this inode * Normally in a error case. * * This one will handle the per-inode data rsv map for accurate reserved * space framework. */ void btrfs_free_reserved_data_space(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len) { struct btrfs_fs_info *fs_info = inode->root->fs_info; /* Make sure the range is aligned to sectorsize */ len = round_up(start + len, fs_info->sectorsize) - round_down(start, fs_info->sectorsize); start = round_down(start, fs_info->sectorsize); btrfs_free_reserved_data_space_noquota(fs_info, len); btrfs_qgroup_free_data(inode, reserved, start, len, NULL); } /* * Release any excessive reservations for an inode. * * @inode: the inode we need to release from * @qgroup_free: free or convert qgroup meta. Unlike normal operation, qgroup * meta reservation needs to know if we are freeing qgroup * reservation or just converting it into per-trans. Normally * @qgroup_free is true for error handling, and false for normal * release. * * This is the same as btrfs_block_rsv_release, except that it handles the * tracepoint for the reservation. */ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 released = 0; u64 qgroup_to_release = 0; /* * Since we statically set the block_rsv->size we just want to say we * are releasing 0 bytes, and then we'll just get the reservation over * the size free'd. */ released = btrfs_block_rsv_release(fs_info, block_rsv, 0, &qgroup_to_release); if (released > 0) trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), released, 0); if (qgroup_free) btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); else btrfs_qgroup_convert_reserved_meta(inode->root, qgroup_to_release); } static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; u64 qgroup_rsv_size = 0; unsigned outstanding_extents; lockdep_assert_held(&inode->lock); outstanding_extents = inode->outstanding_extents; /* * Insert size for the number of outstanding extents, 1 normal size for * updating the inode. */ if (outstanding_extents) { reserve_size = btrfs_calc_insert_metadata_size(fs_info, outstanding_extents); reserve_size += btrfs_calc_metadata_size(fs_info, 1); } if (!(inode->flags & BTRFS_INODE_NODATASUM)) { u64 csum_leaves; csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves); } /* * For qgroup rsv, the calculation is very simple: * account one nodesize for each outstanding extent * * This is overestimating in most cases. */ qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; spin_lock(&block_rsv->lock); block_rsv->size = reserve_size; block_rsv->qgroup_rsv_size = qgroup_rsv_size; spin_unlock(&block_rsv->lock); } static void calc_inode_reservations(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 nr_extents = count_max_extents(fs_info, num_bytes); u64 csum_leaves; u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); if (inode->flags & BTRFS_INODE_NODATASUM) csum_leaves = 0; else csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, nr_extents + csum_leaves); /* * finish_ordered_io has to update the inode, so add the space required * for an inode update. */ *meta_reserve += inode_update; *qgroup_reserve = nr_extents * fs_info->nodesize; } int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, bool noflush) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 meta_reserve, qgroup_reserve; unsigned nr_extents; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; /* * If we are a free space inode we need to not flush since we will be in * the middle of a transaction commit. We also don't need the delalloc * mutex since we won't race with anybody. We need this mostly to make * lockdep shut its filthy mouth. * * If we have a transaction open (can happen if we call truncate_block * from truncate), then we need FLUSH_LIMIT so we don't deadlock. */ if (noflush || btrfs_is_free_space_inode(inode)) { flush = BTRFS_RESERVE_NO_FLUSH; } else { if (current->journal_info) flush = BTRFS_RESERVE_FLUSH_LIMIT; } num_bytes = ALIGN(num_bytes, fs_info->sectorsize); disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize); /* * We always want to do it this way, every other way is wrong and ends * in tears. Pre-reserving the amount we are going to add will always * be the right way, because otherwise if we have enough parallelism we * could end up with thousands of inodes all holding little bits of * reservations they were able to make previously and the only way to * reclaim that space is to ENOSPC out the operations and clear * everything out and try again, which is bad. This way we just * over-reserve slightly, and clean up the mess when we are done. */ calc_inode_reservations(inode, num_bytes, disk_num_bytes, &meta_reserve, &qgroup_reserve); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true, noflush); if (ret) return ret; ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, meta_reserve, flush); if (ret) { btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); return ret; } /* * Now we need to update our outstanding extents and csum bytes _first_ * and then add the reservation to the block_rsv. This keeps us from * racing with an ordered completion or some such that would think it * needs to free the reservation we just made. */ nr_extents = count_max_extents(fs_info, num_bytes); spin_lock(&inode->lock); btrfs_mod_outstanding_extents(inode, nr_extents); if (!(inode->flags & BTRFS_INODE_NODATASUM)) inode->csum_bytes += disk_num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); /* Now we can safely add our space to our block rsv */ btrfs_block_rsv_add_bytes(block_rsv, meta_reserve, false); trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), meta_reserve, 1); spin_lock(&block_rsv->lock); block_rsv->qgroup_rsv_reserved += qgroup_reserve; spin_unlock(&block_rsv->lock); return 0; } /* * Release a metadata reservation for an inode. * * @inode: the inode to release the reservation for. * @num_bytes: the number of bytes we are releasing. * @qgroup_free: free qgroup reservation or convert it to per-trans reservation * * This will release the metadata reservation for an inode. This can be called * once we complete IO for a given set of bytes to release their metadata * reservations, or on error for the same reason. */ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, bool qgroup_free) { struct btrfs_fs_info *fs_info = inode->root->fs_info; num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); if (!(inode->flags & BTRFS_INODE_NODATASUM)) inode->csum_bytes -= num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); if (btrfs_is_testing(fs_info)) return; btrfs_inode_rsv_release(inode, qgroup_free); } /* * Release our outstanding_extents for an inode. * * @inode: the inode to balance the reservation for. * @num_bytes: the number of bytes we originally reserved with * * When we reserve space we increase outstanding_extents for the extents we may * add. Once we've set the range as delalloc or created our ordered extents we * have outstanding_extents to track the real usage, so we use this to free our * temporarily tracked outstanding_extents. This _must_ be used in conjunction * with btrfs_delalloc_reserve_metadata. */ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) { struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned num_extents; spin_lock(&inode->lock); num_extents = count_max_extents(fs_info, num_bytes); btrfs_mod_outstanding_extents(inode, -num_extents); btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); if (btrfs_is_testing(fs_info)) return; btrfs_inode_rsv_release(inode, true); } /* * Reserve data and metadata space for delalloc * * @inode: inode we're writing to * @start: start range we are writing to * @len: how long the range we are writing to * @reserved: mandatory parameter, record actually reserved qgroup ranges of * current reservation. * * This will do the following things * * - reserve space in data space info for num bytes and reserve precious * corresponding qgroup space * (Done in check_data_free_space) * * - reserve space for metadata space, based on the number of outstanding * extents and how much csums will be needed also reserve metadata space in a * per root over-reserve method. * - add to the inodes->delalloc_bytes * - add it to the fs_info's delalloc inodes list. * (Above 3 all done in delalloc_reserve_metadata) * * Return 0 for success * Return <0 for error(-ENOSPC or -EDQUOT) */ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len) { int ret; ret = btrfs_check_data_free_space(inode, reserved, start, len, false); if (ret < 0) return ret; ret = btrfs_delalloc_reserve_metadata(inode, len, len, false); if (ret < 0) { btrfs_free_reserved_data_space(inode, *reserved, start, len); extent_changeset_free(*reserved); *reserved = NULL; } return ret; } /* * Release data and metadata space for delalloc * * @inode: inode we're releasing space for * @reserved: list of changed/reserved ranges * @start: start position of the space already reserved * @len: length of the space already reserved * @qgroup_free: should qgroup reserved-space also be freed * * Release the metadata space that was not used and will decrement * ->delalloc_bytes and remove it from the fs_info->delalloc_inodes list if * there are no delalloc bytes left. Also it will handle the qgroup reserved * space. */ void btrfs_delalloc_release_space(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len, bool qgroup_free) { btrfs_delalloc_release_metadata(inode, len, qgroup_free); btrfs_free_reserved_data_space(inode, reserved, start, len); } |
13 13 13 16 2 15 14 2 3 3 3 13 6 7 13 13 13 1 13 13 7 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 | // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC recvmsg() implementation * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/net.h> #include <linux/skbuff.h> #include <linux/export.h> #include <linux/sched/signal.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" /* * Post a call for attention by the socket or kernel service. Further * notifications are suppressed by putting recvmsg_link on a dummy queue. */ void rxrpc_notify_socket(struct rxrpc_call *call) { struct rxrpc_sock *rx; struct sock *sk; _enter("%d", call->debug_id); if (!list_empty(&call->recvmsg_link)) return; rcu_read_lock(); rx = rcu_dereference(call->socket); sk = &rx->sk; if (rx && sk->sk_state < RXRPC_CLOSE) { if (call->notify_rx) { spin_lock(&call->notify_lock); call->notify_rx(sk, call, call->user_call_ID); spin_unlock(&call->notify_lock); } else { spin_lock(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { rxrpc_get_call(call, rxrpc_call_get_notify_socket); list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); } spin_unlock(&rx->recvmsg_lock); if (!sock_flag(sk, SOCK_DEAD)) { _debug("call %ps", sk->sk_data_ready); sk->sk_data_ready(sk); } } } rcu_read_unlock(); _leave(""); } /* * Pass a call terminating message to userspace. */ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) { u32 tmp = 0; int ret; switch (call->completion) { case RXRPC_CALL_SUCCEEDED: ret = 0; if (rxrpc_is_service_call(call)) ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); break; case RXRPC_CALL_REMOTELY_ABORTED: tmp = call->abort_code; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); break; case RXRPC_CALL_LOCALLY_ABORTED: tmp = call->abort_code; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); break; case RXRPC_CALL_NETWORK_ERROR: tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); break; case RXRPC_CALL_LOCAL_ERROR: tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); break; default: pr_err("Invalid terminal call state %u\n", call->completion); BUG(); break; } trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal, call->ackr_window - 1, call->rx_pkt_offset, call->rx_pkt_len, ret); return ret; } /* * Discard a packet we've used up and advance the Rx window by one. */ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_serial_t serial; rxrpc_seq_t old_consumed = call->rx_consumed, tseq; bool last; int acked; _enter("%d", call->debug_id); skb = skb_dequeue(&call->recvmsg_queue); rxrpc_see_skb(skb, rxrpc_skb_see_rotate); sp = rxrpc_skb(skb); tseq = sp->hdr.seq; serial = sp->hdr.serial; last = sp->hdr.flags & RXRPC_LAST_PACKET; /* Barrier against rxrpc_input_data(). */ if (after(tseq, call->rx_consumed)) smp_store_release(&call->rx_consumed, tseq); rxrpc_free_skb(skb, rxrpc_skb_put_rotate); trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate, serial, call->rx_consumed); if (last) set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags); /* Check to see if there's an ACK that needs sending. */ acked = atomic_add_return(call->rx_consumed - old_consumed, &call->ackr_nr_consumed); if (acked > 8 && !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) rxrpc_poke_call(call, rxrpc_call_poke_idle); } /* * Decrypt and verify a DATA packet. */ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); if (sp->flags & RXRPC_RX_VERIFIED) return 0; return call->security->verify_packet(call, skb); } /* * Deliver messages to a call. This keeps processing packets until the buffer * is filled and we find either more DATA (returns 0) or the end of the DATA * (returns 1). If more packets are required, it returns -EAGAIN and if the * call has failed it returns -EIO. */ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, struct iov_iter *iter, size_t len, int flags, size_t *_offset) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t seq = 0; size_t remain; unsigned int rx_pkt_offset, rx_pkt_len; int copy, ret = -EAGAIN, ret2; rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; if (rxrpc_call_has_failed(call)) { seq = call->ackr_window - 1; ret = -EIO; goto done; } if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) { seq = call->ackr_window - 1; ret = 1; goto done; } /* No one else can be removing stuff from the queue, so we shouldn't * need the Rx lock to walk it. */ skb = skb_peek(&call->recvmsg_queue); while (skb) { rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg); sp = rxrpc_skb(skb); seq = sp->hdr.seq; if (!(flags & MSG_PEEK)) trace_rxrpc_receive(call, rxrpc_receive_front, sp->hdr.serial, seq); if (msg) sock_recv_timestamp(msg, sock->sk, skb); if (rx_pkt_offset == 0) { ret2 = rxrpc_verify_data(call, skb); trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, sp->offset, sp->len, ret2); if (ret2 < 0) { kdebug("verify = %d", ret2); ret = ret2; goto out; } rx_pkt_offset = sp->offset; rx_pkt_len = sp->len; } else { trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); } /* We have to handle short, empty and used-up DATA packets. */ remain = len - *_offset; copy = rx_pkt_len; if (copy > remain) copy = remain; if (copy > 0) { ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, copy); if (ret2 < 0) { ret = ret2; goto out; } /* handle piecemeal consumption of data packets */ rx_pkt_offset += copy; rx_pkt_len -= copy; *_offset += copy; } if (rx_pkt_len > 0) { trace_rxrpc_recvdata(call, rxrpc_recvmsg_full, seq, rx_pkt_offset, rx_pkt_len, 0); ASSERTCMP(*_offset, ==, len); ret = 0; break; } /* The whole packet has been transferred. */ if (sp->hdr.flags & RXRPC_LAST_PACKET) ret = 1; rx_pkt_offset = 0; rx_pkt_len = 0; skb = skb_peek_next(skb, &call->recvmsg_queue); if (!(flags & MSG_PEEK)) rxrpc_rotate_rx_window(call); } out: if (!(flags & MSG_PEEK)) { call->rx_pkt_offset = rx_pkt_offset; call->rx_pkt_len = rx_pkt_len; } done: trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq, rx_pkt_offset, rx_pkt_len, ret); if (ret == -EAGAIN) set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); return ret; } /* * Receive a message from an RxRPC socket * - we need to be careful about two or more threads calling recvmsg * simultaneously */ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct list_head *l; unsigned int call_debug_id = 0; size_t copied = 0; long timeo; int ret; DEFINE_WAIT(wait); trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0); if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); try_again: lock_sock(&rx->sk); /* Return immediately if a client socket has no outstanding calls */ if (RB_EMPTY_ROOT(&rx->calls) && list_empty(&rx->recvmsg_q) && rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); return -EAGAIN; } if (list_empty(&rx->recvmsg_q)) { ret = -EWOULDBLOCK; if (timeo == 0) { call = NULL; goto error_no_call; } release_sock(&rx->sk); /* Wait for something to happen */ prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, TASK_INTERRUPTIBLE); ret = sock_error(&rx->sk); if (ret) goto wait_error; if (list_empty(&rx->recvmsg_q)) { if (signal_pending(current)) goto wait_interrupted; trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0); timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(&rx->sk), &wait); goto try_again; } /* Find the next call and dequeue it if we're not just peeking. If we * do dequeue it, that comes with a ref that we will need to release. * We also want to weed out calls that got requeued whilst we were * shovelling data out. */ spin_lock(&rx->recvmsg_lock); l = rx->recvmsg_q.next; call = list_entry(l, struct rxrpc_call, recvmsg_link); if (!rxrpc_call_is_complete(call) && skb_queue_empty(&call->recvmsg_queue)) { list_del_init(&call->recvmsg_link); spin_unlock(&rx->recvmsg_lock); release_sock(&rx->sk); trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0); rxrpc_put_call(call, rxrpc_call_put_recvmsg); goto try_again; } if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else rxrpc_get_call(call, rxrpc_call_get_recvmsg); spin_unlock(&rx->recvmsg_lock); call_debug_id = call->debug_id; trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0); /* We're going to drop the socket lock, so we need to lock the call * against interference by sendmsg. */ if (!mutex_trylock(&call->user_mutex)) { ret = -EWOULDBLOCK; if (flags & MSG_DONTWAIT) goto error_requeue_call; ret = -ERESTARTSYS; if (mutex_lock_interruptible(&call->user_mutex) < 0) goto error_requeue_call; } release_sock(&rx->sk); if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { if (flags & MSG_CMSG_COMPAT) { unsigned int id32 = call->user_call_ID; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, sizeof(unsigned int), &id32); } else { unsigned long idl = call->user_call_ID; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, sizeof(unsigned long), &idl); } if (ret < 0) goto error_unlock_call; } if (msg->msg_name && call->peer) { size_t len = sizeof(call->dest_srx); memcpy(msg->msg_name, &call->dest_srx, len); msg->msg_namelen = len; } ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, flags, &copied); if (ret == -EAGAIN) ret = 0; if (ret == -EIO) goto call_failed; if (ret < 0) goto error_unlock_call; if (rxrpc_call_is_complete(call) && skb_queue_empty(&call->recvmsg_queue)) goto call_complete; if (rxrpc_call_has_failed(call)) goto call_failed; if (!skb_queue_empty(&call->recvmsg_queue)) rxrpc_notify_socket(call); goto not_yet_complete; call_failed: rxrpc_purge_queue(&call->recvmsg_queue); call_complete: ret = rxrpc_recvmsg_term(call, msg); if (ret < 0) goto error_unlock_call; if (!(flags & MSG_PEEK)) rxrpc_release_call(rx, call); msg->msg_flags |= MSG_EOR; ret = 1; not_yet_complete: if (ret == 0) msg->msg_flags |= MSG_MORE; else msg->msg_flags &= ~MSG_MORE; ret = copied; error_unlock_call: mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_recvmsg); trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret); return ret; error_requeue_call: if (!(flags & MSG_PEEK)) { spin_lock(&rx->recvmsg_lock); list_add(&call->recvmsg_link, &rx->recvmsg_q); spin_unlock(&rx->recvmsg_lock); trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0); } else { rxrpc_put_call(call, rxrpc_call_put_recvmsg); } error_no_call: release_sock(&rx->sk); error_trace: trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret); return ret; wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); call = NULL; goto error_trace; } /** * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info * @sock: The socket that the call exists on * @call: The call to send data through * @iter: The buffer to receive into * @_len: The amount of data we want to receive (decreased on return) * @want_more: True if more data is expected to be read * @_abort: Where the abort code is stored if -ECONNABORTED is returned * @_service: Where to store the actual service ID (may be upgraded) * * Allow a kernel service to receive data and pick up information about the * state of a call. Returns 0 if got what was asked for and there's more * available, 1 if we got what was asked for and we're at the end of the data * and -EAGAIN if we need more data. * * Note that we may return -EAGAIN to drain empty packets at the end of the * data, even if we've already copied over the requested data. * * *_abort should also be initialised to 0. */ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, struct iov_iter *iter, size_t *_len, bool want_more, u32 *_abort, u16 *_service) { size_t offset = 0; int ret; _enter("{%d},%zu,%d", call->debug_id, *_len, want_more); mutex_lock(&call->user_mutex); ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset); *_len -= offset; if (ret == -EIO) goto call_failed; if (ret < 0) goto out; /* We can only reach here with a partially full buffer if we have * reached the end of the data. We must otherwise have a full buffer * or have been given -EAGAIN. */ if (ret == 1) { if (iov_iter_count(iter) > 0) goto short_data; if (!want_more) goto read_phase_complete; ret = 0; goto out; } if (!want_more) goto excess_data; goto out; read_phase_complete: ret = 1; out: if (_service) *_service = call->dest_srx.srx_service; mutex_unlock(&call->user_mutex); _leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort); return ret; short_data: trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data, call->cid, call->call_id, call->rx_consumed, 0, -EBADMSG); ret = -EBADMSG; goto out; excess_data: trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data, call->cid, call->call_id, call->rx_consumed, 0, -EMSGSIZE); ret = -EMSGSIZE; goto out; call_failed: *_abort = call->abort_code; ret = call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { ret = 1; if (iov_iter_count(iter) > 0) ret = -ECONNRESET; } goto out; } EXPORT_SYMBOL(rxrpc_kernel_recv_data); |
5 5 2 2 2 2 4 4 4 1 1 1 1 1 4 4 4 4 4 16 4 11 6 8 2 2 2 1 4 2 1 1 3 1 4 3 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 6 1 2 1 2 1 1 3 1 2 5 1 2 1 1 2 3 1 1 1 2 16 1 7 14 3 1 2 9 1 1 4 2 1 5 3 1 1 1 1 1 1 1 1 1 1 4 2 2 2 2 1 2 2 1 2 2 1 1 1 2 17 1 1 6 1 3 2 2 1 1 1 5 1 1 3 4 4 4 1 1 3 1 3 1 1 1 5 4 5 6 1 5 2 2 1 1 1 8 1 6 1 6 4 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 | // SPDX-License-Identifier: GPL-2.0 /* * cfg80211 - wext compat code * * This is temporary code until all wireless functionality is migrated * into cfg80211, when that happens all the exports here go away and * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2019-2023 Intel Corporation */ #include <linux/export.h> #include <linux/wireless.h> #include <linux/nl80211.h> #include <linux/if_arp.h> #include <linux/etherdevice.h> #include <linux/slab.h> #include <net/iw_handler.h> #include <net/cfg80211.h> #include <net/cfg80211-wext.h> #include "wext-compat.h" #include "core.h" #include "rdev-ops.h" int cfg80211_wext_giwname(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { strcpy(wrqu->name, "IEEE 802.11"); return 0; } EXPORT_WEXT_HANDLER(cfg80211_wext_giwname); int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { __u32 *mode = &wrqu->mode; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; int ret; rdev = wiphy_to_rdev(wdev->wiphy); switch (*mode) { case IW_MODE_INFRA: type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_MONITOR: type = NL80211_IFTYPE_MONITOR; break; default: return -EINVAL; } if (type == wdev->iftype) return 0; memset(&vifparams, 0, sizeof(vifparams)); wiphy_lock(wdev->wiphy); ret = cfg80211_change_iface(rdev, dev, type, &vifparams); wiphy_unlock(wdev->wiphy); return ret; } EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode); int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { __u32 *mode = &wrqu->mode; struct wireless_dev *wdev = dev->ieee80211_ptr; if (!wdev) return -EOPNOTSUPP; switch (wdev->iftype) { case NL80211_IFTYPE_AP: *mode = IW_MODE_MASTER; break; case NL80211_IFTYPE_STATION: *mode = IW_MODE_INFRA; break; case NL80211_IFTYPE_ADHOC: *mode = IW_MODE_ADHOC; break; case NL80211_IFTYPE_MONITOR: *mode = IW_MODE_MONITOR; break; case NL80211_IFTYPE_WDS: *mode = IW_MODE_REPEAT; break; case NL80211_IFTYPE_AP_VLAN: *mode = IW_MODE_SECOND; /* FIXME */ break; default: *mode = IW_MODE_AUTO; break; } return 0; } EXPORT_WEXT_HANDLER(cfg80211_wext_giwmode); int cfg80211_wext_giwrange(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct iw_range *range = (struct iw_range *) extra; enum nl80211_band band; int i, c = 0; if (!wdev) return -EOPNOTSUPP; data->length = sizeof(struct iw_range); memset(range, 0, sizeof(struct iw_range)); range->we_version_compiled = WIRELESS_EXT; range->we_version_source = 21; range->retry_capa = IW_RETRY_LIMIT; range->retry_flags = IW_RETRY_LIMIT; range->min_retry = 0; range->max_retry = 255; range->min_rts = 0; range->max_rts = 2347; range->min_frag = 256; range->max_frag = 2346; range->max_encoding_tokens = 4; range->max_qual.updated = IW_QUAL_NOISE_INVALID; switch (wdev->wiphy->signal_type) { case CFG80211_SIGNAL_TYPE_NONE: break; case CFG80211_SIGNAL_TYPE_MBM: range->max_qual.level = (u8)-110; range->max_qual.qual = 70; range->avg_qual.qual = 35; range->max_qual.updated |= IW_QUAL_DBM; range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; break; case CFG80211_SIGNAL_TYPE_UNSPEC: range->max_qual.level = 100; range->max_qual.qual = 100; range->avg_qual.qual = 50; range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; break; } range->avg_qual.level = range->max_qual.level / 2; range->avg_qual.noise = range->max_qual.noise / 2; range->avg_qual.updated = range->max_qual.updated; for (i = 0; i < wdev->wiphy->n_cipher_suites; i++) { switch (wdev->wiphy->cipher_suites[i]) { case WLAN_CIPHER_SUITE_TKIP: range->enc_capa |= (IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_WPA); break; case WLAN_CIPHER_SUITE_CCMP: range->enc_capa |= (IW_ENC_CAPA_CIPHER_CCMP | IW_ENC_CAPA_WPA2); break; case WLAN_CIPHER_SUITE_WEP40: range->encoding_size[range->num_encoding_sizes++] = WLAN_KEY_LEN_WEP40; break; case WLAN_CIPHER_SUITE_WEP104: range->encoding_size[range->num_encoding_sizes++] = WLAN_KEY_LEN_WEP104; break; } } for (band = 0; band < NUM_NL80211_BANDS; band ++) { struct ieee80211_supported_band *sband; sband = wdev->wiphy->bands[band]; if (!sband) continue; for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) { struct ieee80211_channel *chan = &sband->channels[i]; if (!(chan->flags & IEEE80211_CHAN_DISABLED)) { range->freq[c].i = ieee80211_frequency_to_channel( chan->center_freq); range->freq[c].m = chan->center_freq; range->freq[c].e = 6; c++; } } } range->num_channels = c; range->num_frequency = c; IW_EVENT_CAPA_SET_KERNEL(range->event_capa); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); if (wdev->wiphy->max_scan_ssids > 0) range->scan_capa |= IW_SCAN_CAPA_ESSID; return 0; } EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange); /** * cfg80211_wext_freq - get wext frequency for non-"auto" * @freq: the wext freq encoding * * Returns: a frequency, or a negative error code, or 0 for auto. */ int cfg80211_wext_freq(struct iw_freq *freq) { /* * Parse frequency - return 0 for auto and * -EINVAL for impossible things. */ if (freq->e == 0) { enum nl80211_band band = NL80211_BAND_2GHZ; if (freq->m < 0) return 0; if (freq->m > 14) band = NL80211_BAND_5GHZ; return ieee80211_channel_to_frequency(freq->m, band); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; if (div <= 0) return -EINVAL; return freq->m / div; } } int cfg80211_wext_siwrts(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rts = &wrqu->rts; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 orts = wdev->wiphy->rts_threshold; int err; wiphy_lock(&rdev->wiphy); if (rts->disabled || !rts->fixed) { wdev->wiphy->rts_threshold = (u32) -1; } else if (rts->value < 0) { err = -EINVAL; goto out; } else { wdev->wiphy->rts_threshold = rts->value; } err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_RTS_THRESHOLD); if (err) wdev->wiphy->rts_threshold = orts; out: wiphy_unlock(&rdev->wiphy); return err; } EXPORT_WEXT_HANDLER(cfg80211_wext_siwrts); int cfg80211_wext_giwrts(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rts = &wrqu->rts; struct wireless_dev *wdev = dev->ieee80211_ptr; rts->value = wdev->wiphy->rts_threshold; rts->disabled = rts->value == (u32) -1; rts->fixed = 1; return 0; } EXPORT_WEXT_HANDLER(cfg80211_wext_giwrts); int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *frag = &wrqu->frag; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 ofrag = wdev->wiphy->frag_threshold; int err; wiphy_lock(&rdev->wiphy); if (frag->disabled || !frag->fixed) { wdev->wiphy->frag_threshold = (u32) -1; } else if (frag->value < 256) { err = -EINVAL; goto out; } else { /* Fragment length must be even, so strip LSB. */ wdev->wiphy->frag_threshold = frag->value & ~0x1; } err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_FRAG_THRESHOLD); if (err) wdev->wiphy->frag_threshold = ofrag; out: wiphy_unlock(&rdev->wiphy); return err; } EXPORT_WEXT_HANDLER(cfg80211_wext_siwfrag); int cfg80211_wext_giwfrag(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *frag = &wrqu->frag; struct wireless_dev *wdev = dev->ieee80211_ptr; frag->value = wdev->wiphy->frag_threshold; frag->disabled = frag->value == (u32) -1; frag->fixed = 1; return 0; } EXPORT_WEXT_HANDLER(cfg80211_wext_giwfrag); static int cfg80211_wext_siwretry(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *retry = &wrqu->retry; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 changed = 0; u8 olong = wdev->wiphy->retry_long; u8 oshort = wdev->wiphy->retry_short; int err; if (retry->disabled || retry->value < 1 || retry->value > 255 || (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; wiphy_lock(&rdev->wiphy); if (retry->flags & IW_RETRY_LONG) { wdev->wiphy->retry_long = retry->value; changed |= WIPHY_PARAM_RETRY_LONG; } else if (retry->flags & IW_RETRY_SHORT) { wdev->wiphy->retry_short = retry->value; changed |= WIPHY_PARAM_RETRY_SHORT; } else { wdev->wiphy->retry_short = retry->value; wdev->wiphy->retry_long = retry->value; changed |= WIPHY_PARAM_RETRY_LONG; changed |= WIPHY_PARAM_RETRY_SHORT; } err = rdev_set_wiphy_params(rdev, changed); if (err) { wdev->wiphy->retry_short = oshort; wdev->wiphy->retry_long = olong; } wiphy_unlock(&rdev->wiphy); return err; } int cfg80211_wext_giwretry(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *retry = &wrqu->retry; struct wireless_dev *wdev = dev->ieee80211_ptr; retry->disabled = 0; if (retry->flags == 0 || (retry->flags & IW_RETRY_SHORT)) { /* * First return short value, iwconfig will ask long value * later if needed */ retry->flags |= IW_RETRY_LIMIT | IW_RETRY_SHORT; retry->value = wdev->wiphy->retry_short; if (wdev->wiphy->retry_long == wdev->wiphy->retry_short) retry->flags |= IW_RETRY_LONG; return 0; } if (retry->flags & IW_RETRY_LONG) { retry->flags = IW_RETRY_LIMIT | IW_RETRY_LONG; retry->value = wdev->wiphy->retry_long; } return 0; } EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry); static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, struct net_device *dev, bool pairwise, const u8 *addr, bool remove, bool tx_key, int idx, struct key_params *params) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i; bool rejoin = false; if (wdev->valid_links) return -EINVAL; if (pairwise && !addr) return -EINVAL; /* * In many cases we won't actually need this, but it's better * to do it first in case the allocation fails. Don't use wext. */ if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; for (i = 0; i < 4; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } if (wdev->iftype != NL80211_IFTYPE_ADHOC && wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { if (!wdev->connected) return -ENOLINK; if (!rdev->ops->set_default_mgmt_key) return -EOPNOTSUPP; if (idx < 4 || idx > 5) return -EINVAL; } else if (idx < 0 || idx > 3) return -EINVAL; if (remove) { err = 0; if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) { /* * If removing the current TX key, we will need to * join a new IBSS without the privacy bit clear. */ if (idx == wdev->wext.default_key && wdev->iftype == NL80211_IFTYPE_ADHOC) { cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } if (!pairwise && addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; else err = rdev_del_key(rdev, dev, -1, idx, pairwise, addr); } wdev->wext.connect.privacy = false; /* * Applications using wireless extensions expect to be * able to delete keys that don't exist, so allow that. */ if (err == -ENOENT) err = 0; if (!err) { if (!addr && idx < 4) { memset(wdev->wext.keys->data[idx], 0, sizeof(wdev->wext.keys->data[idx])); wdev->wext.keys->params[idx].key_len = 0; wdev->wext.keys->params[idx].cipher = 0; } if (idx == wdev->wext.default_key) wdev->wext.default_key = -1; else if (idx == wdev->wext.default_mgmt_key) wdev->wext.default_mgmt_key = -1; } if (!err && rejoin) err = cfg80211_ibss_wext_join(rdev, wdev); return err; } if (addr) tx_key = false; if (cfg80211_validate_key_settings(rdev, params, idx, pairwise, addr)) return -EINVAL; err = 0; if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) err = rdev_add_key(rdev, dev, -1, idx, pairwise, addr, params); else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) return -EINVAL; if (err) return err; /* * We only need to store WEP keys, since they're the only keys that * can be set before a connection is established and persist after * disconnecting. */ if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 || params->cipher == WLAN_CIPHER_SUITE_WEP104)) { wdev->wext.keys->params[idx] = *params; memcpy(wdev->wext.keys->data[idx], params->key, params->key_len); wdev->wext.keys->params[idx].key = wdev->wext.keys->data[idx]; } if ((params->cipher == WLAN_CIPHER_SUITE_WEP40 || params->cipher == WLAN_CIPHER_SUITE_WEP104) && (tx_key || (!addr && wdev->wext.default_key == -1))) { if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) { /* * If we are getting a new TX key from not having * had one before we need to join a new IBSS with * the privacy bit set. */ if (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->wext.default_key == -1) { cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } err = rdev_set_default_key(rdev, dev, -1, idx, true, true); } if (!err) { wdev->wext.default_key = idx; if (rejoin) err = cfg80211_ibss_wext_join(rdev, wdev); } return err; } if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC && (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) { if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) err = rdev_set_default_mgmt_key(rdev, dev, -1, idx); if (!err) wdev->wext.default_mgmt_key = idx; return err; } return 0; } static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *keybuf) { struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int idx, err; bool remove = false; struct key_params params; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; /* no use -- only MFP (set_default_mgmt_key) is optional */ if (!rdev->ops->del_key || !rdev->ops->add_key || !rdev->ops->set_default_key) return -EOPNOTSUPP; wiphy_lock(&rdev->wiphy); if (wdev->valid_links) { err = -EOPNOTSUPP; goto out; } idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { idx = wdev->wext.default_key; if (idx < 0) idx = 0; } else if (idx < 1 || idx > 4) { err = -EINVAL; goto out; } else { idx--; } if (erq->flags & IW_ENCODE_DISABLED) remove = true; else if (erq->length == 0) { /* No key data - just set the default TX key index */ err = 0; if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) err = rdev_set_default_key(rdev, dev, -1, idx, true, true); if (!err) wdev->wext.default_key = idx; goto out; } memset(¶ms, 0, sizeof(params)); params.key = keybuf; params.key_len = erq->length; if (erq->length == 5) { params.cipher = WLAN_CIPHER_SUITE_WEP40; } else if (erq->length == 13) { params.cipher = WLAN_CIPHER_SUITE_WEP104; } else if (!remove) { err = -EINVAL; goto out; } err = cfg80211_set_encryption(rdev, dev, false, NULL, remove, wdev->wext.default_key == -1, idx, ¶ms); out: wiphy_unlock(&rdev->wiphy); return err; } static int cfg80211_wext_siwencodeext(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; const u8 *addr; int idx; bool remove = false; struct key_params params; u32 cipher; int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; /* no use -- only MFP (set_default_mgmt_key) is optional */ if (!rdev->ops->del_key || !rdev->ops->add_key || !rdev->ops->set_default_key) return -EOPNOTSUPP; if (wdev->valid_links) return -EOPNOTSUPP; switch (ext->alg) { case IW_ENCODE_ALG_NONE: remove = true; cipher = 0; break; case IW_ENCODE_ALG_WEP: if (ext->key_len == 5) cipher = WLAN_CIPHER_SUITE_WEP40; else if (ext->key_len == 13) cipher = WLAN_CIPHER_SUITE_WEP104; else return -EINVAL; break; case IW_ENCODE_ALG_TKIP: cipher = WLAN_CIPHER_SUITE_TKIP; break; case IW_ENCODE_ALG_CCMP: cipher = WLAN_CIPHER_SUITE_CCMP; break; case IW_ENCODE_ALG_AES_CMAC: cipher = WLAN_CIPHER_SUITE_AES_CMAC; break; default: return -EOPNOTSUPP; } if (erq->flags & IW_ENCODE_DISABLED) remove = true; idx = erq->flags & IW_ENCODE_INDEX; if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { if (idx < 4 || idx > 5) { idx = wdev->wext.default_mgmt_key; if (idx < 0) return -EINVAL; } else idx--; } else { if (idx < 1 || idx > 4) { idx = wdev->wext.default_key; if (idx < 0) return -EINVAL; } else idx--; } addr = ext->addr.sa_data; if (is_broadcast_ether_addr(addr)) addr = NULL; memset(¶ms, 0, sizeof(params)); params.key = ext->key; params.key_len = ext->key_len; params.cipher = cipher; if (ext->ext_flags & IW_ENCODE_EXT_RX_SEQ_VALID) { params.seq = ext->rx_seq; params.seq_len = 6; } wiphy_lock(wdev->wiphy); ret = cfg80211_set_encryption( rdev, dev, !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), addr, remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, idx, ¶ms); wiphy_unlock(wdev->wiphy); return ret; } static int cfg80211_wext_giwencode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *keybuf) { struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; int idx; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { idx = wdev->wext.default_key; if (idx < 0) idx = 0; } else if (idx < 1 || idx > 4) return -EINVAL; else idx--; erq->flags = idx + 1; if (!wdev->wext.keys || !wdev->wext.keys->params[idx].cipher) { erq->flags |= IW_ENCODE_DISABLED; erq->length = 0; return 0; } erq->length = min_t(size_t, erq->length, wdev->wext.keys->params[idx].key_len); memcpy(keybuf, wdev->wext.keys->params[idx].key, erq->length); erq->flags |= IW_ENCODE_ENABLED; return 0; } static int cfg80211_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_freq *wextfreq = &wrqu->freq; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; int freq, ret; wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: ret = cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); break; case NL80211_IFTYPE_ADHOC: ret = cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); break; case NL80211_IFTYPE_MONITOR: freq = cfg80211_wext_freq(wextfreq); if (freq < 0) { ret = freq; break; } if (freq == 0) { ret = -EINVAL; break; } chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) { ret = -EINVAL; break; } ret = cfg80211_set_monitor_channel(rdev, &chandef); break; case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); if (freq < 0) { ret = freq; break; } if (freq == 0) { ret = -EINVAL; break; } chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) { ret = -EINVAL; break; } ret = cfg80211_set_mesh_channel(rdev, wdev, &chandef); break; default: ret = -EOPNOTSUPP; break; } wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_giwfreq(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_freq *freq = &wrqu->freq; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef = {}; int ret; wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: ret = cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); break; case NL80211_IFTYPE_ADHOC: ret = cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); break; case NL80211_IFTYPE_MONITOR: if (!rdev->ops->get_channel) { ret = -EINVAL; break; } ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret) break; freq->m = chandef.chan->center_freq; freq->e = 6; ret = 0; break; default: ret = -EINVAL; break; } wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_siwtxpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; int ret; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; if (data->txpower.flags & IW_TXPOW_RANGE) return -EINVAL; if (!rdev->ops->set_tx_power) return -EOPNOTSUPP; /* only change when not disabling */ if (!data->txpower.disabled) { rfkill_set_sw_state(rdev->wiphy.rfkill, false); if (data->txpower.fixed) { /* * wext doesn't support negative values, see * below where it's for automatic */ if (data->txpower.value < 0) return -EINVAL; dbm = data->txpower.value; type = NL80211_TX_POWER_FIXED; /* TODO: do regulatory check! */ } else { /* * Automatic power level setting, max being the value * passed in from userland. */ if (data->txpower.value < 0) { type = NL80211_TX_POWER_AUTOMATIC; } else { dbm = data->txpower.value; type = NL80211_TX_POWER_LIMITED; } } } else { if (rfkill_set_sw_state(rdev->wiphy.rfkill, true)) schedule_work(&rdev->rfkill_block); return 0; } wiphy_lock(&rdev->wiphy); ret = rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_giwtxpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err, val; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; if (data->txpower.flags & IW_TXPOW_RANGE) return -EINVAL; if (!rdev->ops->get_tx_power) return -EOPNOTSUPP; wiphy_lock(&rdev->wiphy); err = rdev_get_tx_power(rdev, wdev, &val); wiphy_unlock(&rdev->wiphy); if (err) return err; /* well... oh well */ data->txpower.fixed = 1; data->txpower.disabled = rfkill_blocked(rdev->wiphy.rfkill); data->txpower.value = val; data->txpower.flags = IW_TXPOW_DBM; return 0; } static int cfg80211_set_auth_alg(struct wireless_dev *wdev, s32 auth_alg) { int nr_alg = 0; if (!auth_alg) return -EINVAL; if (auth_alg & ~(IW_AUTH_ALG_OPEN_SYSTEM | IW_AUTH_ALG_SHARED_KEY | IW_AUTH_ALG_LEAP)) return -EINVAL; if (auth_alg & IW_AUTH_ALG_OPEN_SYSTEM) { nr_alg++; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_OPEN_SYSTEM; } if (auth_alg & IW_AUTH_ALG_SHARED_KEY) { nr_alg++; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_SHARED_KEY; } if (auth_alg & IW_AUTH_ALG_LEAP) { nr_alg++; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_NETWORK_EAP; } if (nr_alg > 1) wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; return 0; } static int cfg80211_set_wpa_version(struct wireless_dev *wdev, u32 wpa_versions) { if (wpa_versions & ~(IW_AUTH_WPA_VERSION_WPA | IW_AUTH_WPA_VERSION_WPA2| IW_AUTH_WPA_VERSION_DISABLED)) return -EINVAL; if ((wpa_versions & IW_AUTH_WPA_VERSION_DISABLED) && (wpa_versions & (IW_AUTH_WPA_VERSION_WPA| IW_AUTH_WPA_VERSION_WPA2))) return -EINVAL; if (wpa_versions & IW_AUTH_WPA_VERSION_DISABLED) wdev->wext.connect.crypto.wpa_versions &= ~(NL80211_WPA_VERSION_1|NL80211_WPA_VERSION_2); if (wpa_versions & IW_AUTH_WPA_VERSION_WPA) wdev->wext.connect.crypto.wpa_versions |= NL80211_WPA_VERSION_1; if (wpa_versions & IW_AUTH_WPA_VERSION_WPA2) wdev->wext.connect.crypto.wpa_versions |= NL80211_WPA_VERSION_2; return 0; } static int cfg80211_set_cipher_group(struct wireless_dev *wdev, u32 cipher) { if (cipher & IW_AUTH_CIPHER_WEP40) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_WEP40; else if (cipher & IW_AUTH_CIPHER_WEP104) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_WEP104; else if (cipher & IW_AUTH_CIPHER_TKIP) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_TKIP; else if (cipher & IW_AUTH_CIPHER_CCMP) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_CCMP; else if (cipher & IW_AUTH_CIPHER_AES_CMAC) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_AES_CMAC; else if (cipher & IW_AUTH_CIPHER_NONE) wdev->wext.connect.crypto.cipher_group = 0; else return -EINVAL; return 0; } static int cfg80211_set_cipher_pairwise(struct wireless_dev *wdev, u32 cipher) { int nr_ciphers = 0; u32 *ciphers_pairwise = wdev->wext.connect.crypto.ciphers_pairwise; if (cipher & IW_AUTH_CIPHER_WEP40) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_WEP40; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_WEP104) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_WEP104; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_TKIP) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_TKIP; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_CCMP) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_CCMP; nr_ciphers++; } if (cipher & IW_AUTH_CIPHER_AES_CMAC) { ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_AES_CMAC; nr_ciphers++; } BUILD_BUG_ON(NL80211_MAX_NR_CIPHER_SUITES < 5); wdev->wext.connect.crypto.n_ciphers_pairwise = nr_ciphers; return 0; } static int cfg80211_set_key_mgt(struct wireless_dev *wdev, u32 key_mgt) { int nr_akm_suites = 0; if (key_mgt & ~(IW_AUTH_KEY_MGMT_802_1X | IW_AUTH_KEY_MGMT_PSK)) return -EINVAL; if (key_mgt & IW_AUTH_KEY_MGMT_802_1X) { wdev->wext.connect.crypto.akm_suites[nr_akm_suites] = WLAN_AKM_SUITE_8021X; nr_akm_suites++; } if (key_mgt & IW_AUTH_KEY_MGMT_PSK) { wdev->wext.connect.crypto.akm_suites[nr_akm_suites] = WLAN_AKM_SUITE_PSK; nr_akm_suites++; } wdev->wext.connect.crypto.n_akm_suites = nr_akm_suites; return 0; } static int cfg80211_wext_siwauth(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *data = &wrqu->param; struct wireless_dev *wdev = dev->ieee80211_ptr; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; switch (data->flags & IW_AUTH_INDEX) { case IW_AUTH_PRIVACY_INVOKED: wdev->wext.connect.privacy = data->value; return 0; case IW_AUTH_WPA_VERSION: return cfg80211_set_wpa_version(wdev, data->value); case IW_AUTH_CIPHER_GROUP: return cfg80211_set_cipher_group(wdev, data->value); case IW_AUTH_KEY_MGMT: return cfg80211_set_key_mgt(wdev, data->value); case IW_AUTH_CIPHER_PAIRWISE: return cfg80211_set_cipher_pairwise(wdev, data->value); case IW_AUTH_80211_AUTH_ALG: return cfg80211_set_auth_alg(wdev, data->value); case IW_AUTH_WPA_ENABLED: case IW_AUTH_RX_UNENCRYPTED_EAPOL: case IW_AUTH_DROP_UNENCRYPTED: case IW_AUTH_MFP: return 0; default: return -EOPNOTSUPP; } } static int cfg80211_wext_giwauth(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { /* XXX: what do we need? */ return -EOPNOTSUPP; } static int cfg80211_wext_siwpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *wrq = &wrqu->power; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); bool ps; int timeout = wdev->ps_timeout; int err; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; if (!rdev->ops->set_power_mgmt) return -EOPNOTSUPP; if (wrq->disabled) { ps = false; } else { switch (wrq->flags & IW_POWER_MODE) { case IW_POWER_ON: /* If not specified */ case IW_POWER_MODE: /* If set all mask */ case IW_POWER_ALL_R: /* If explicitely state all */ ps = true; break; default: /* Otherwise we ignore */ return -EINVAL; } if (wrq->flags & ~(IW_POWER_MODE | IW_POWER_TIMEOUT)) return -EINVAL; if (wrq->flags & IW_POWER_TIMEOUT) timeout = wrq->value / 1000; } wiphy_lock(&rdev->wiphy); err = rdev_set_power_mgmt(rdev, dev, ps, timeout); wiphy_unlock(&rdev->wiphy); if (err) return err; wdev->ps = ps; wdev->ps_timeout = timeout; return 0; } static int cfg80211_wext_giwpower(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *wrq = &wrqu->power; struct wireless_dev *wdev = dev->ieee80211_ptr; wrq->disabled = !wdev->ps; return 0; } static int cfg80211_wext_siwrate(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rate = &wrqu->bitrate; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; int band, ridx, ret; bool match = false; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; memset(&mask, 0, sizeof(mask)); fixed = 0; maxrate = (u32)-1; if (rate->value < 0) { /* nothing */ } else if (rate->fixed) { fixed = rate->value / 100000; } else { maxrate = rate->value / 100000; } for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wdev->wiphy->bands[band]; if (sband == NULL) continue; for (ridx = 0; ridx < sband->n_bitrates; ridx++) { struct ieee80211_rate *srate = &sband->bitrates[ridx]; if (fixed == srate->bitrate) { mask.control[band].legacy = 1 << ridx; match = true; break; } if (srate->bitrate <= maxrate) { mask.control[band].legacy |= 1 << ridx; match = true; } } } if (!match) return -EINVAL; wiphy_lock(&rdev->wiphy); if (dev->ieee80211_ptr->valid_links) ret = -EOPNOTSUPP; else ret = rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_giwrate(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_param *rate = &wrqu->bitrate; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct station_info sinfo = {}; u8 addr[ETH_ALEN]; int err; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!rdev->ops->get_station) return -EOPNOTSUPP; err = 0; if (!wdev->valid_links && wdev->links[0].client.current_bss) memcpy(addr, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); else err = -EOPNOTSUPP; if (err) return err; wiphy_lock(&rdev->wiphy); err = rdev_get_station(rdev, dev, addr, &sinfo); wiphy_unlock(&rdev->wiphy); if (err) return err; if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { err = -EOPNOTSUPP; goto free; } rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); free: cfg80211_sinfo_release_content(&sinfo); return err; } /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use static structs */ static struct iw_statistics wstats; static struct station_info sinfo = {}; u8 bssid[ETH_ALEN]; int ret; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) return NULL; if (!rdev->ops->get_station) return NULL; /* Grab BSSID of current BSS, if any */ wiphy_lock(&rdev->wiphy); if (wdev->valid_links || !wdev->links[0].client.current_bss) { wiphy_unlock(&rdev->wiphy); return NULL; } memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); memset(&sinfo, 0, sizeof(sinfo)); ret = rdev_get_station(rdev, dev, bssid, &sinfo); wiphy_unlock(&rdev->wiphy); if (ret) return NULL; memset(&wstats, 0, sizeof(wstats)); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) { int sig = sinfo.signal; wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; wstats.qual.updated |= IW_QUAL_DBM; wstats.qual.level = sig; if (sig < -110) sig = -110; else if (sig > -40) sig = -40; wstats.qual.qual = sig + 110; break; } fallthrough; case CFG80211_SIGNAL_TYPE_UNSPEC: if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) { wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; wstats.qual.level = sinfo.signal; wstats.qual.qual = sinfo.signal; break; } fallthrough; default: wstats.qual.updated |= IW_QUAL_LEVEL_INVALID; wstats.qual.updated |= IW_QUAL_QUAL_INVALID; } wstats.qual.updated |= IW_QUAL_NOISE_INVALID; if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC)) wstats.discard.misc = sinfo.rx_dropped_misc; if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) wstats.discard.retries = sinfo.tx_failed; cfg80211_sinfo_release_content(&sinfo); return &wstats; } static int cfg80211_wext_siwap(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int ret; wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: ret = cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); break; case NL80211_IFTYPE_STATION: ret = cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); break; default: ret = -EOPNOTSUPP; break; } wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_giwap(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int ret; wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: ret = cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); break; case NL80211_IFTYPE_STATION: ret = cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); break; default: ret = -EOPNOTSUPP; break; } wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_siwessid(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *ssid) { struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int ret; wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: ret = cfg80211_ibss_wext_siwessid(dev, info, data, ssid); break; case NL80211_IFTYPE_STATION: ret = cfg80211_mgd_wext_siwessid(dev, info, data, ssid); break; default: ret = -EOPNOTSUPP; break; } wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_giwessid(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *ssid) { struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int ret; data->flags = 0; data->length = 0; wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: ret = cfg80211_ibss_wext_giwessid(dev, info, data, ssid); break; case NL80211_IFTYPE_STATION: ret = cfg80211_mgd_wext_giwessid(dev, info, data, ssid); break; default: ret = -EOPNOTSUPP; break; } wiphy_unlock(&rdev->wiphy); return ret; } static int cfg80211_wext_siwpmksa(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; int ret; memset(&cfg_pmksa, 0, sizeof(struct cfg80211_pmksa)); if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; cfg_pmksa.bssid = pmksa->bssid.sa_data; cfg_pmksa.pmkid = pmksa->pmkid; wiphy_lock(&rdev->wiphy); switch (pmksa->cmd) { case IW_PMKSA_ADD: if (!rdev->ops->set_pmksa) { ret = -EOPNOTSUPP; break; } ret = rdev_set_pmksa(rdev, dev, &cfg_pmksa); break; case IW_PMKSA_REMOVE: if (!rdev->ops->del_pmksa) { ret = -EOPNOTSUPP; break; } ret = rdev_del_pmksa(rdev, dev, &cfg_pmksa); break; case IW_PMKSA_FLUSH: if (!rdev->ops->flush_pmksa) { ret = -EOPNOTSUPP; break; } ret = rdev_flush_pmksa(rdev, dev); break; default: ret = -EOPNOTSUPP; break; } wiphy_unlock(&rdev->wiphy); return ret; } static const iw_handler cfg80211_handlers[] = { IW_HANDLER(SIOCGIWNAME, cfg80211_wext_giwname), IW_HANDLER(SIOCSIWFREQ, cfg80211_wext_siwfreq), IW_HANDLER(SIOCGIWFREQ, cfg80211_wext_giwfreq), IW_HANDLER(SIOCSIWMODE, cfg80211_wext_siwmode), IW_HANDLER(SIOCGIWMODE, cfg80211_wext_giwmode), IW_HANDLER(SIOCGIWRANGE, cfg80211_wext_giwrange), IW_HANDLER(SIOCSIWAP, cfg80211_wext_siwap), IW_HANDLER(SIOCGIWAP, cfg80211_wext_giwap), IW_HANDLER(SIOCSIWMLME, cfg80211_wext_siwmlme), IW_HANDLER(SIOCSIWSCAN, cfg80211_wext_siwscan), IW_HANDLER(SIOCGIWSCAN, cfg80211_wext_giwscan), IW_HANDLER(SIOCSIWESSID, cfg80211_wext_siwessid), IW_HANDLER(SIOCGIWESSID, cfg80211_wext_giwessid), IW_HANDLER(SIOCSIWRATE, cfg80211_wext_siwrate), IW_HANDLER(SIOCGIWRATE, cfg80211_wext_giwrate), IW_HANDLER(SIOCSIWRTS, cfg80211_wext_siwrts), IW_HANDLER(SIOCGIWRTS, cfg80211_wext_giwrts), IW_HANDLER(SIOCSIWFRAG, cfg80211_wext_siwfrag), IW_HANDLER(SIOCGIWFRAG, cfg80211_wext_giwfrag), IW_HANDLER(SIOCSIWTXPOW, cfg80211_wext_siwtxpower), IW_HANDLER(SIOCGIWTXPOW, cfg80211_wext_giwtxpower), IW_HANDLER(SIOCSIWRETRY, cfg80211_wext_siwretry), IW_HANDLER(SIOCGIWRETRY, cfg80211_wext_giwretry), IW_HANDLER(SIOCSIWENCODE, cfg80211_wext_siwencode), IW_HANDLER(SIOCGIWENCODE, cfg80211_wext_giwencode), IW_HANDLER(SIOCSIWPOWER, cfg80211_wext_siwpower), IW_HANDLER(SIOCGIWPOWER, cfg80211_wext_giwpower), IW_HANDLER(SIOCSIWGENIE, cfg80211_wext_siwgenie), IW_HANDLER(SIOCSIWAUTH, cfg80211_wext_siwauth), IW_HANDLER(SIOCGIWAUTH, cfg80211_wext_giwauth), IW_HANDLER(SIOCSIWENCODEEXT, cfg80211_wext_siwencodeext), IW_HANDLER(SIOCSIWPMKSA, cfg80211_wext_siwpmksa), }; const struct iw_handler_def cfg80211_wext_handler = { .num_standard = ARRAY_SIZE(cfg80211_handlers), .standard = cfg80211_handlers, .get_wireless_stats = cfg80211_wireless_stats, }; |
35 32 197 44 10 7 1 2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 | #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> #include <linux/tc_act/tc_csum.h> #include <net/flow_offload.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> static struct workqueue_struct *nf_flow_offload_add_wq; static struct workqueue_struct *nf_flow_offload_del_wq; static struct workqueue_struct *nf_flow_offload_stats_wq; struct flow_offload_work { struct list_head list; enum flow_cls_command cmd; struct nf_flowtable *flowtable; struct flow_offload *flow; struct work_struct work; }; #define NF_FLOW_DISSECTOR(__match, __type, __field) \ (__match)->dissector.offset[__type] = \ offsetof(struct nf_flow_key, __field) static void nf_flow_rule_lwt_match(struct nf_flow_match *match, struct ip_tunnel_info *tun_info) { struct nf_flow_key *mask = &match->mask; struct nf_flow_key *key = &match->key; unsigned long long enc_keys; if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX)) return; NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id); mask->enc_key_id.keyid = 0xffffffff; enc_keys = BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL); if (ip_tunnel_info_af(tun_info) == AF_INET) { NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4); key->enc_ipv4.src = tun_info->key.u.ipv4.dst; key->enc_ipv4.dst = tun_info->key.u.ipv4.src; if (key->enc_ipv4.src) mask->enc_ipv4.src = 0xffffffff; if (key->enc_ipv4.dst) mask->enc_ipv4.dst = 0xffffffff; enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } else { memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst, sizeof(struct in6_addr)); memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src, sizeof(struct in6_addr)); if (memcmp(&key->enc_ipv6.src, &in6addr_any, sizeof(struct in6_addr))) memset(&mask->enc_ipv6.src, 0xff, sizeof(struct in6_addr)); if (memcmp(&key->enc_ipv6.dst, &in6addr_any, sizeof(struct in6_addr))) memset(&mask->enc_ipv6.dst, 0xff, sizeof(struct in6_addr)); enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } match->dissector.used_keys |= enc_keys; } static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key, struct flow_dissector_key_vlan *mask, u16 vlan_id, __be16 proto) { key->vlan_id = vlan_id; mask->vlan_id = VLAN_VID_MASK; key->vlan_tpid = proto; mask->vlan_tpid = 0xffff; } static int nf_flow_rule_match(struct nf_flow_match *match, const struct flow_offload_tuple *tuple, struct dst_entry *other_dst) { struct nf_flow_key *mask = &match->mask; struct nf_flow_key *key = &match->key; struct ip_tunnel_info *tun_info; bool vlan_encap = false; NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); if (other_dst && other_dst->lwtstate) { tun_info = lwt_tun_info(other_dst->lwtstate); nf_flow_rule_lwt_match(match, tun_info); } if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC) key->meta.ingress_ifindex = tuple->tc.iifidx; else key->meta.ingress_ifindex = tuple->iifidx; mask->meta.ingress_ifindex = 0xffffffff; if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) && tuple->encap[0].proto == htons(ETH_P_8021Q)) { NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan); nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, tuple->encap[0].id, tuple->encap[0].proto); vlan_encap = true; } if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) && tuple->encap[1].proto == htons(ETH_P_8021Q)) { if (vlan_encap) { NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN, cvlan); nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan, tuple->encap[1].id, tuple->encap[1].proto); } else { NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan); nf_flow_rule_vlan_match(&key->vlan, &mask->vlan, tuple->encap[1].id, tuple->encap[1].proto); } } switch (tuple->l3proto) { case AF_INET: key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; key->basic.n_proto = htons(ETH_P_IP); key->ipv4.src = tuple->src_v4.s_addr; mask->ipv4.src = 0xffffffff; key->ipv4.dst = tuple->dst_v4.s_addr; mask->ipv4.dst = 0xffffffff; break; case AF_INET6: key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; key->basic.n_proto = htons(ETH_P_IPV6); key->ipv6.src = tuple->src_v6; memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src)); key->ipv6.dst = tuple->dst_v6; memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst)); break; default: return -EOPNOTSUPP; } mask->control.addr_type = 0xffff; match->dissector.used_keys |= BIT_ULL(key->control.addr_type); mask->basic.n_proto = 0xffff; switch (tuple->l4proto) { case IPPROTO_TCP: key->tcp.flags = 0; mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16); match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_TCP); break; case IPPROTO_UDP: case IPPROTO_GRE: break; default: return -EOPNOTSUPP; } key->basic.ip_proto = tuple->l4proto; mask->basic.ip_proto = 0xff; match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_META) | BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | BIT_ULL(FLOW_DISSECTOR_KEY_BASIC); switch (tuple->l4proto) { case IPPROTO_TCP: case IPPROTO_UDP: key->tp.src = tuple->src_port; mask->tp.src = 0xffff; key->tp.dst = tuple->dst_port; mask->tp.dst = 0xffff; match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_PORTS); break; } return 0; } static void flow_offload_mangle(struct flow_action_entry *entry, enum flow_action_mangle_base htype, u32 offset, const __be32 *value, const __be32 *mask) { entry->id = FLOW_ACTION_MANGLE; entry->mangle.htype = htype; entry->mangle.offset = offset; memcpy(&entry->mangle.mask, mask, sizeof(u32)); memcpy(&entry->mangle.val, value, sizeof(u32)); } static inline struct flow_action_entry * flow_action_entry_next(struct nf_flow_rule *flow_rule) { int i = flow_rule->rule->action.num_entries++; return &flow_rule->rule->action.entries[i]; } static int flow_offload_eth_src(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); const struct flow_offload_tuple *other_tuple, *this_tuple; struct net_device *dev = NULL; const unsigned char *addr; u32 mask, val; u16 val16; this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { case FLOW_OFFLOAD_XMIT_DIRECT: addr = this_tuple->out.h_source; break; case FLOW_OFFLOAD_XMIT_NEIGH: other_tuple = &flow->tuplehash[!dir].tuple; dev = dev_get_by_index(net, other_tuple->iifidx); if (!dev) return -ENOENT; addr = dev->dev_addr; break; default: return -EOPNOTSUPP; } mask = ~0xffff0000; memcpy(&val16, addr, 2); val = val16 << 16; flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, &val, &mask); mask = ~0xffffffff; memcpy(&val, addr + 2, 4); flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, &val, &mask); dev_put(dev); return 0; } static int flow_offload_eth_dst(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); const struct flow_offload_tuple *other_tuple, *this_tuple; const struct dst_entry *dst_cache; unsigned char ha[ETH_ALEN]; struct neighbour *n; const void *daddr; u32 mask, val; u8 nud_state; u16 val16; this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { case FLOW_OFFLOAD_XMIT_DIRECT: ether_addr_copy(ha, this_tuple->out.h_dest); break; case FLOW_OFFLOAD_XMIT_NEIGH: other_tuple = &flow->tuplehash[!dir].tuple; daddr = &other_tuple->src_v4; dst_cache = this_tuple->dst_cache; n = dst_neigh_lookup(dst_cache, daddr); if (!n) return -ENOENT; read_lock_bh(&n->lock); nud_state = n->nud_state; ether_addr_copy(ha, n->ha); read_unlock_bh(&n->lock); neigh_release(n); if (!(nud_state & NUD_VALID)) return -ENOENT; break; default: return -EOPNOTSUPP; } mask = ~0xffffffff; memcpy(&val, ha, 4); flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0, &val, &mask); mask = ~0x0000ffff; memcpy(&val16, ha + 4, 2); val = val16; flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4, &val, &mask); return 0; } static void flow_offload_ipv4_snat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; offset = offsetof(struct iphdr, saddr); break; case FLOW_OFFLOAD_DIR_REPLY: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; offset = offsetof(struct iphdr, daddr); break; default: return; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); } static void flow_offload_ipv4_dnat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; offset = offsetof(struct iphdr, daddr); break; case FLOW_OFFLOAD_DIR_REPLY: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; offset = offsetof(struct iphdr, saddr); break; default: return; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); } static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, unsigned int offset, const __be32 *addr, const __be32 *mask) { struct flow_action_entry *entry; int i; for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) { entry = flow_action_entry_next(flow_rule); flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, offset + i * sizeof(u32), &addr[i], mask); } } static void flow_offload_ipv6_snat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32; offset = offsetof(struct ipv6hdr, saddr); break; case FLOW_OFFLOAD_DIR_REPLY: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32; offset = offsetof(struct ipv6hdr, daddr); break; default: return; } flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static void flow_offload_ipv6_dnat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32; offset = offsetof(struct ipv6hdr, daddr); break; case FLOW_OFFLOAD_DIR_REPLY: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32; offset = offsetof(struct ipv6hdr, saddr); break; default: return; } flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static int flow_offload_l4proto(const struct flow_offload *flow) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; u8 type = 0; switch (protonum) { case IPPROTO_TCP: type = FLOW_ACT_MANGLE_HDR_TYPE_TCP; break; case IPPROTO_UDP: type = FLOW_ACT_MANGLE_HDR_TYPE_UDP; break; default: break; } return type; } static void flow_offload_port_snat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); offset = 0; /* offsetof(struct tcphdr, source); */ port = htonl(port << 16); mask = ~htonl(0xffff0000); break; case FLOW_OFFLOAD_DIR_REPLY: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port); offset = 0; /* offsetof(struct tcphdr, dest); */ port = htonl(port); mask = ~htonl(0xffff); break; default: return; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); } static void flow_offload_port_dnat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); offset = 0; /* offsetof(struct tcphdr, dest); */ port = htonl(port); mask = ~htonl(0xffff); break; case FLOW_OFFLOAD_DIR_REPLY: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port); offset = 0; /* offsetof(struct tcphdr, source); */ port = htonl(port << 16); mask = ~htonl(0xffff0000); break; default: return; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); } static void flow_offload_ipv4_checksum(struct net *net, const struct flow_offload *flow, struct nf_flow_rule *flow_rule) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; struct flow_action_entry *entry = flow_action_entry_next(flow_rule); entry->id = FLOW_ACTION_CSUM; entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; switch (protonum) { case IPPROTO_TCP: entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP; break; case IPPROTO_UDP: entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; break; } } static void flow_offload_redirect(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple, *other_tuple; struct flow_action_entry *entry; struct net_device *dev; int ifindex; this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { case FLOW_OFFLOAD_XMIT_DIRECT: this_tuple = &flow->tuplehash[dir].tuple; ifindex = this_tuple->out.hw_ifidx; break; case FLOW_OFFLOAD_XMIT_NEIGH: other_tuple = &flow->tuplehash[!dir].tuple; ifindex = other_tuple->iifidx; break; default: return; } dev = dev_get_by_index(net, ifindex); if (!dev) return; entry = flow_action_entry_next(flow_rule); entry->id = FLOW_ACTION_REDIRECT; entry->dev = dev; } static void flow_offload_encap_tunnel(const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple; struct flow_action_entry *entry; struct dst_entry *dst; this_tuple = &flow->tuplehash[dir].tuple; if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) return; dst = this_tuple->dst_cache; if (dst && dst->lwtstate) { struct ip_tunnel_info *tun_info; tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); entry->id = FLOW_ACTION_TUNNEL_ENCAP; entry->tunnel = tun_info; } } } static void flow_offload_decap_tunnel(const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *other_tuple; struct flow_action_entry *entry; struct dst_entry *dst; other_tuple = &flow->tuplehash[!dir].tuple; if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) return; dst = other_tuple->dst_cache; if (dst && dst->lwtstate) { struct ip_tunnel_info *tun_info; tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); entry->id = FLOW_ACTION_TUNNEL_DECAP; } } } static int nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *other_tuple; const struct flow_offload_tuple *tuple; int i; flow_offload_decap_tunnel(flow, dir, flow_rule); flow_offload_encap_tunnel(flow, dir, flow_rule); if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; tuple = &flow->tuplehash[dir].tuple; for (i = 0; i < tuple->encap_num; i++) { struct flow_action_entry *entry; if (tuple->in_vlan_ingress & BIT(i)) continue; if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { entry = flow_action_entry_next(flow_rule); entry->id = FLOW_ACTION_VLAN_POP; } } other_tuple = &flow->tuplehash[!dir].tuple; for (i = 0; i < other_tuple->encap_num; i++) { struct flow_action_entry *entry; if (other_tuple->in_vlan_ingress & BIT(i)) continue; entry = flow_action_entry_next(flow_rule); switch (other_tuple->encap[i].proto) { case htons(ETH_P_PPP_SES): entry->id = FLOW_ACTION_PPPOE_PUSH; entry->pppoe.sid = other_tuple->encap[i].id; break; case htons(ETH_P_8021Q): entry->id = FLOW_ACTION_VLAN_PUSH; entry->vlan.vid = other_tuple->encap[i].id; entry->vlan.proto = other_tuple->encap[i].proto; break; } } return 0; } int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { flow_offload_ipv4_snat(net, flow, dir, flow_rule); flow_offload_port_snat(net, flow, dir, flow_rule); } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { flow_offload_ipv4_dnat(net, flow, dir, flow_rule); flow_offload_port_dnat(net, flow, dir, flow_rule); } if (test_bit(NF_FLOW_SNAT, &flow->flags) || test_bit(NF_FLOW_DNAT, &flow->flags)) flow_offload_ipv4_checksum(net, flow, flow_rule); flow_offload_redirect(net, flow, dir, flow_rule); return 0; } EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4); int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0) return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { flow_offload_ipv6_snat(net, flow, dir, flow_rule); flow_offload_port_snat(net, flow, dir, flow_rule); } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { flow_offload_ipv6_dnat(net, flow, dir, flow_rule); flow_offload_port_dnat(net, flow, dir, flow_rule); } flow_offload_redirect(net, flow, dir, flow_rule); return 0; } EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); #define NF_FLOW_RULE_ACTION_MAX 16 static struct nf_flow_rule * nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { const struct nf_flowtable *flowtable = offload->flowtable; const struct flow_offload_tuple *tuple, *other_tuple; struct flow_offload *flow = offload->flow; struct dst_entry *other_dst = NULL; struct nf_flow_rule *flow_rule; int err = -ENOMEM; flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); if (!flow_rule) goto err_flow; flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX); if (!flow_rule->rule) goto err_flow_rule; flow_rule->rule->match.dissector = &flow_rule->match.dissector; flow_rule->rule->match.mask = &flow_rule->match.mask; flow_rule->rule->match.key = &flow_rule->match.key; tuple = &flow->tuplehash[dir].tuple; other_tuple = &flow->tuplehash[!dir].tuple; if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) other_dst = other_tuple->dst_cache; err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); if (err < 0) goto err_flow_match; flow_rule->rule->action.num_entries = 0; if (flowtable->type->action(net, flow, dir, flow_rule) < 0) goto err_flow_match; return flow_rule; err_flow_match: kfree(flow_rule->rule); err_flow_rule: kfree(flow_rule); err_flow: return NULL; } static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry; int i; for (i = 0; i < flow_rule->rule->action.num_entries; i++) { entry = &flow_rule->rule->action.entries[i]; if (entry->id != FLOW_ACTION_REDIRECT) continue; dev_put(entry->dev); } kfree(flow_rule->rule); kfree(flow_rule); } static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[]) { int i; for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++) __nf_flow_offload_destroy(flow_rule[i]); } static int nf_flow_offload_alloc(const struct flow_offload_work *offload, struct nf_flow_rule *flow_rule[]) { struct net *net = read_pnet(&offload->flowtable->net); flow_rule[0] = nf_flow_offload_rule_alloc(net, offload, FLOW_OFFLOAD_DIR_ORIGINAL); if (!flow_rule[0]) return -ENOMEM; flow_rule[1] = nf_flow_offload_rule_alloc(net, offload, FLOW_OFFLOAD_DIR_REPLY); if (!flow_rule[1]) { __nf_flow_offload_destroy(flow_rule[0]); return -ENOMEM; } return 0; } static void nf_flow_offload_init(struct flow_cls_offload *cls_flow, __be16 proto, int priority, enum flow_cls_command cmd, const struct flow_offload_tuple *tuple, struct netlink_ext_ack *extack) { cls_flow->common.protocol = proto; cls_flow->common.prio = priority; cls_flow->common.extack = extack; cls_flow->command = cmd; cls_flow->cookie = (unsigned long)tuple; } static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct flow_offload *flow, struct nf_flow_rule *flow_rule, enum flow_offload_tuple_dir dir, int priority, int cmd, struct flow_stats *stats, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; struct flow_block_cb *block_cb; struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; int err, i = 0; nf_flow_offload_init(&cls_flow, proto, priority, cmd, &flow->tuplehash[dir].tuple, &extack); if (cmd == FLOW_CLS_REPLACE) cls_flow.rule = flow_rule->rule; down_read(&flowtable->flow_block_lock); list_for_each_entry(block_cb, block_cb_list, list) { err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); if (err < 0) continue; i++; } up_read(&flowtable->flow_block_lock); if (cmd == FLOW_CLS_STATS) memcpy(stats, &cls_flow.stats, sizeof(*stats)); return i; } static int flow_offload_tuple_add(struct flow_offload_work *offload, struct nf_flow_rule *flow_rule, enum flow_offload_tuple_dir dir) { return nf_flow_offload_tuple(offload->flowtable, offload->flow, flow_rule, dir, offload->flowtable->priority, FLOW_CLS_REPLACE, NULL, &offload->flowtable->flow_block.cb_list); } static void flow_offload_tuple_del(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, offload->flowtable->priority, FLOW_CLS_DESTROY, NULL, &offload->flowtable->flow_block.cb_list); } static int flow_offload_rule_add(struct flow_offload_work *offload, struct nf_flow_rule *flow_rule[]) { int ok_count = 0; ok_count += flow_offload_tuple_add(offload, flow_rule[0], FLOW_OFFLOAD_DIR_ORIGINAL); if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags)) ok_count += flow_offload_tuple_add(offload, flow_rule[1], FLOW_OFFLOAD_DIR_REPLY); if (ok_count == 0) return -ENOENT; return 0; } static void flow_offload_work_add(struct flow_offload_work *offload) { struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX]; int err; err = nf_flow_offload_alloc(offload, flow_rule); if (err < 0) return; err = flow_offload_rule_add(offload, flow_rule); if (err < 0) goto out; set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); out: nf_flow_offload_destroy(flow_rule); } static void flow_offload_work_del(struct flow_offload_work *offload) { clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL); if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags)) flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY); set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags); } static void flow_offload_tuple_stats(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir, struct flow_stats *stats) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, offload->flowtable->priority, FLOW_CLS_STATS, stats, &offload->flowtable->flow_block.cb_list); } static void flow_offload_work_stats(struct flow_offload_work *offload) { struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {}; u64 lastused; flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]); if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags)) flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]); lastused = max_t(u64, stats[0].lastused, stats[1].lastused); offload->flow->timeout = max_t(u64, offload->flow->timeout, lastused + flow_offload_get_timeout(offload->flow)); if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) { if (stats[0].pkts) nf_ct_acct_add(offload->flow->ct, FLOW_OFFLOAD_DIR_ORIGINAL, stats[0].pkts, stats[0].bytes); if (stats[1].pkts) nf_ct_acct_add(offload->flow->ct, FLOW_OFFLOAD_DIR_REPLY, stats[1].pkts, stats[1].bytes); } } static void flow_offload_work_handler(struct work_struct *work) { struct flow_offload_work *offload; struct net *net; offload = container_of(work, struct flow_offload_work, work); net = read_pnet(&offload->flowtable->net); switch (offload->cmd) { case FLOW_CLS_REPLACE: flow_offload_work_add(offload); NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add); break; case FLOW_CLS_DESTROY: flow_offload_work_del(offload); NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del); break; case FLOW_CLS_STATS: flow_offload_work_stats(offload); NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats); break; default: WARN_ON_ONCE(1); } clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags); kfree(offload); } static void flow_offload_queue_work(struct flow_offload_work *offload) { struct net *net = read_pnet(&offload->flowtable->net); if (offload->cmd == FLOW_CLS_REPLACE) { NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add); queue_work(nf_flow_offload_add_wq, &offload->work); } else if (offload->cmd == FLOW_CLS_DESTROY) { NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del); queue_work(nf_flow_offload_del_wq, &offload->work); } else { NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats); queue_work(nf_flow_offload_stats_wq, &offload->work); } } static struct flow_offload_work * nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, struct flow_offload *flow, unsigned int cmd) { struct flow_offload_work *offload; if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags)) return NULL; offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC); if (!offload) { clear_bit(NF_FLOW_HW_PENDING, &flow->flags); return NULL; } offload->cmd = cmd; offload->flow = flow; offload->flowtable = flowtable; INIT_WORK(&offload->work, flow_offload_work_handler); return offload; } void nf_flow_offload_add(struct nf_flowtable *flowtable, struct flow_offload *flow) { struct flow_offload_work *offload; offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE); if (!offload) return; flow_offload_queue_work(offload); } void nf_flow_offload_del(struct nf_flowtable *flowtable, struct flow_offload *flow) { struct flow_offload_work *offload; offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY); if (!offload) return; set_bit(NF_FLOW_HW_DYING, &flow->flags); flow_offload_queue_work(offload); } void nf_flow_offload_stats(struct nf_flowtable *flowtable, struct flow_offload *flow) { struct flow_offload_work *offload; __s32 delta; delta = nf_flow_timeout_delta(flow->timeout); if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10)) return; offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS); if (!offload) return; flow_offload_queue_work(offload); } void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable) { if (nf_flowtable_hw_offload(flowtable)) { flush_workqueue(nf_flow_offload_del_wq); nf_flow_table_gc_run(flowtable); } } void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) { if (nf_flowtable_hw_offload(flowtable)) { flush_workqueue(nf_flow_offload_add_wq); flush_workqueue(nf_flow_offload_del_wq); flush_workqueue(nf_flow_offload_stats_wq); } } static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, struct flow_block_offload *bo, enum flow_block_command cmd) { struct flow_block_cb *block_cb, *next; int err = 0; down_write(&flowtable->flow_block_lock); switch (cmd) { case FLOW_BLOCK_BIND: list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); break; case FLOW_BLOCK_UNBIND: list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { list_del(&block_cb->list); flow_block_cb_free(block_cb); } break; default: WARN_ON_ONCE(1); err = -EOPNOTSUPP; } up_write(&flowtable->flow_block_lock); return err; } static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, struct net *net, enum flow_block_command cmd, struct nf_flowtable *flowtable, struct netlink_ext_ack *extack) { memset(bo, 0, sizeof(*bo)); bo->net = net; bo->block = &flowtable->flow_block; bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; bo->cb_list_head = &flowtable->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb) { struct nf_flowtable *flowtable = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; nf_flow_table_gc_cleanup(flowtable, dev); down_write(&flowtable->flow_block_lock); list_del(&block_cb->list); list_del(&block_cb->driver_list); flow_block_cb_free(block_cb); up_write(&flowtable->flow_block_lock); } static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd, struct netlink_ext_ack *extack) { nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo, nf_flow_table_indr_cleanup); } static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd, struct netlink_ext_ack *extack) { int err; nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); down_write(&flowtable->flow_block_lock); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); up_write(&flowtable->flow_block_lock); if (err < 0) return err; return 0; } int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd) { struct netlink_ext_ack extack = {}; struct flow_block_offload bo; int err; if (!nf_flowtable_hw_offload(flowtable)) return 0; if (dev->netdev_ops->ndo_setup_tc) err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack); else err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, &extack); if (err < 0) return err; return nf_flow_table_block_setup(flowtable, &bo, cmd); } EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); int nf_flow_table_offload_init(void) { nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add", WQ_UNBOUND | WQ_SYSFS, 0); if (!nf_flow_offload_add_wq) return -ENOMEM; nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del", WQ_UNBOUND | WQ_SYSFS, 0); if (!nf_flow_offload_del_wq) goto err_del_wq; nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats", WQ_UNBOUND | WQ_SYSFS, 0); if (!nf_flow_offload_stats_wq) goto err_stats_wq; return 0; err_stats_wq: destroy_workqueue(nf_flow_offload_del_wq); err_del_wq: destroy_workqueue(nf_flow_offload_add_wq); return -ENOMEM; } void nf_flow_table_offload_exit(void) { destroy_workqueue(nf_flow_offload_add_wq); destroy_workqueue(nf_flow_offload_del_wq); destroy_workqueue(nf_flow_offload_stats_wq); } |
2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 | // SPDX-License-Identifier: GPL-2.0 /* * Central processing for nfsd. * * Authors: Olaf Kirch (okir@monad.swb.de) * * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ #include <linux/sched/signal.h> #include <linux/freezer.h> #include <linux/module.h> #include <linux/fs_struct.h> #include <linux/swap.h> #include <linux/siphash.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svc_xprt.h> #include <linux/lockd/bind.h> #include <linux/nfsacl.h> #include <linux/seq_file.h> #include <linux/inetdevice.h> #include <net/addrconf.h> #include <net/ipv6.h> #include <net/net_namespace.h> #include "nfsd.h" #include "cache.h" #include "vfs.h" #include "netns.h" #include "filecache.h" #include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_SVC extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static int nfsd_acl_rpcbind_set(struct net *, const struct svc_program *, u32, int, unsigned short, unsigned short); static __be32 nfsd_acl_init_request(struct svc_rqst *, const struct svc_program *, struct svc_process_info *); #endif static int nfsd_rpcbind_set(struct net *, const struct svc_program *, u32, int, unsigned short, unsigned short); static __be32 nfsd_init_request(struct svc_rqst *, const struct svc_program *, struct svc_process_info *); /* * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks. * * Finally, the nfsd_mutex also protects some of the global variables that are * accessed when nfsd starts and that are settable via the write_* routines in * nfsctl.c. In particular: * * user_recovery_dirname * user_lease_time * nfsd_versions */ DEFINE_MUTEX(nfsd_mutex); /* * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. * nfsd_drc_max_pages limits the total amount of memory available for * version 4.1 DRC caches. * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ DEFINE_SPINLOCK(nfsd_drc_lock); unsigned long nfsd_drc_max_mem; unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static const struct svc_version *nfsd_acl_version[] = { # if defined(CONFIG_NFSD_V2_ACL) [2] = &nfsd_acl_version2, # endif # if defined(CONFIG_NFSD_V3_ACL) [3] = &nfsd_acl_version3, # endif }; #define NFSD_ACL_MINVERS 2 #define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version) static struct svc_program nfsd_acl_program = { .pg_prog = NFS_ACL_PROGRAM, .pg_nvers = NFSD_ACL_NRVERS, .pg_vers = nfsd_acl_version, .pg_name = "nfsacl", .pg_class = "nfsd", .pg_stats = &nfsd_acl_svcstats, .pg_authenticate = &svc_set_client, .pg_init_request = nfsd_acl_init_request, .pg_rpcbind_set = nfsd_acl_rpcbind_set, }; static struct svc_stat nfsd_acl_svcstats = { .program = &nfsd_acl_program, }; #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ static const struct svc_version *nfsd_version[] = { #if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, #endif [3] = &nfsd_version3, #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, #endif }; #define NFSD_MINVERS 2 #define NFSD_NRVERS ARRAY_SIZE(nfsd_version) struct svc_program nfsd_program = { #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) .pg_next = &nfsd_acl_program, #endif .pg_prog = NFS_PROGRAM, /* program number */ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ .pg_vers = nfsd_version, /* version table */ .pg_name = "nfsd", /* program name */ .pg_class = "nfsd", /* authentication class */ .pg_stats = &nfsd_svcstats, /* version table */ .pg_authenticate = &svc_set_client, /* export authentication */ .pg_init_request = nfsd_init_request, .pg_rpcbind_set = nfsd_rpcbind_set, }; static bool nfsd_support_version(int vers) { if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS) return nfsd_version[vers] != NULL; return false; } static bool * nfsd_alloc_versions(void) { bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL); unsigned i; if (vers) { /* All compiled versions are enabled by default */ for (i = 0; i < NFSD_NRVERS; i++) vers[i] = nfsd_support_version(i); } return vers; } static bool * nfsd_alloc_minorversions(void) { bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1, sizeof(bool), GFP_KERNEL); unsigned i; if (vers) { /* All minor versions are enabled by default */ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) vers[i] = nfsd_support_version(4); } return vers; } void nfsd_netns_free_versions(struct nfsd_net *nn) { kfree(nn->nfsd_versions); kfree(nn->nfsd4_minorversions); nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; } static void nfsd_netns_init_versions(struct nfsd_net *nn) { if (!nn->nfsd_versions) { nn->nfsd_versions = nfsd_alloc_versions(); nn->nfsd4_minorversions = nfsd_alloc_minorversions(); if (!nn->nfsd_versions || !nn->nfsd4_minorversions) nfsd_netns_free_versions(nn); } } int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) return 0; switch(change) { case NFSD_SET: if (nn->nfsd_versions) nn->nfsd_versions[vers] = nfsd_support_version(vers); break; case NFSD_CLEAR: nfsd_netns_init_versions(nn); if (nn->nfsd_versions) nn->nfsd_versions[vers] = false; break; case NFSD_TEST: if (nn->nfsd_versions) return nn->nfsd_versions[vers]; fallthrough; case NFSD_AVAIL: return nfsd_support_version(vers); } return 0; } static void nfsd_adjust_nfsd_versions4(struct nfsd_net *nn) { unsigned i; for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) { if (nn->nfsd4_minorversions[i]) return; } nfsd_vers(nn, 4, NFSD_CLEAR); } int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change) { if (minorversion > NFSD_SUPPORTED_MINOR_VERSION && change != NFSD_AVAIL) return -1; switch(change) { case NFSD_SET: if (nn->nfsd4_minorversions) { nfsd_vers(nn, 4, NFSD_SET); nn->nfsd4_minorversions[minorversion] = nfsd_vers(nn, 4, NFSD_TEST); } break; case NFSD_CLEAR: nfsd_netns_init_versions(nn); if (nn->nfsd4_minorversions) { nn->nfsd4_minorversions[minorversion] = false; nfsd_adjust_nfsd_versions4(nn); } break; case NFSD_TEST: if (nn->nfsd4_minorversions) return nn->nfsd4_minorversions[minorversion]; return nfsd_vers(nn, 4, NFSD_TEST); case NFSD_AVAIL: return minorversion <= NFSD_SUPPORTED_MINOR_VERSION && nfsd_vers(nn, 4, NFSD_AVAIL); } return 0; } /* * Maximum number of nfsd processes */ #define NFSD_MAXSERVS 8192 int nfsd_nrthreads(struct net *net) { int rv = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) rv = nn->nfsd_serv->sv_nrthreads; mutex_unlock(&nfsd_mutex); return rv; } static int nfsd_init_socks(struct net *net, const struct cred *cred) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; return 0; } static int nfsd_users = 0; static int nfsd_startup_generic(void) { int ret; if (nfsd_users++) return 0; ret = nfsd_file_cache_init(); if (ret) goto dec_users; ret = nfs4_state_start(); if (ret) goto out_file_cache; return 0; out_file_cache: nfsd_file_cache_shutdown(); dec_users: nfsd_users--; return ret; } static void nfsd_shutdown_generic(void) { if (--nfsd_users) return; nfs4_state_shutdown(); nfsd_file_cache_shutdown(); } static bool nfsd_needs_lockd(struct nfsd_net *nn) { return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } /** * nfsd_copy_write_verifier - Atomically copy a write verifier * @verf: buffer in which to receive the verifier cookie * @nn: NFS net namespace * * This function provides a wait-free mechanism for copying the * namespace's write verifier without tearing it. */ void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) { unsigned int seq; do { seq = read_seqbegin(&nn->writeverf_lock); memcpy(verf, nn->writeverf, sizeof(nn->writeverf)); } while (read_seqretry(&nn->writeverf_lock, seq)); } static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn) { struct timespec64 now; u64 verf; /* * Because the time value is hashed, y2038 time_t overflow * is irrelevant in this usage. */ ktime_get_raw_ts64(&now); verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key); memcpy(nn->writeverf, &verf, sizeof(nn->writeverf)); } /** * nfsd_reset_write_verifier - Generate a new write verifier * @nn: NFS net namespace * * This function updates the ->writeverf field of @nn. This field * contains an opaque cookie that, according to Section 18.32.3 of * RFC 8881, "the client can use to determine whether a server has * changed instance state (e.g., server restart) between a call to * WRITE and a subsequent call to either WRITE or COMMIT. This * cookie MUST be unchanged during a single instance of the NFSv4.1 * server and MUST be unique between instances of the NFSv4.1 * server." */ void nfsd_reset_write_verifier(struct nfsd_net *nn) { write_seqlock(&nn->writeverf_lock); nfsd_reset_write_verifier_locked(nn); write_sequnlock(&nn->writeverf_lock); } /* * Crank up a set of per-namespace resources for a new NFSD instance, * including lockd, a duplicate reply cache, an open file cache * instance, and a cache of NFSv4 state objects. */ static int nfsd_startup_net(struct net *net, const struct cred *cred) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; if (nn->nfsd_net_up) return 0; ret = nfsd_startup_generic(); if (ret) return ret; ret = nfsd_init_socks(net, cred); if (ret) goto out_socks; if (nfsd_needs_lockd(nn) && !nn->lockd_up) { ret = lockd_up(net, cred); if (ret) goto out_socks; nn->lockd_up = true; } ret = nfsd_file_cache_start_net(net); if (ret) goto out_lockd; ret = nfsd_reply_cache_init(nn); if (ret) goto out_filecache; ret = nfs4_state_start_net(net); if (ret) goto out_reply_cache; #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_init_umount_work(nn); #endif nn->nfsd_net_up = true; return 0; out_reply_cache: nfsd_reply_cache_shutdown(nn); out_filecache: nfsd_file_cache_shutdown_net(net); out_lockd: if (nn->lockd_up) { lockd_down(net); nn->lockd_up = false; } out_socks: nfsd_shutdown_generic(); return ret; } static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs4_state_shutdown_net(net); nfsd_reply_cache_shutdown(nn); nfsd_file_cache_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); nn->lockd_up = false; } nn->nfsd_net_up = false; nfsd_shutdown_generic(); } static DEFINE_SPINLOCK(nfsd_notifier_lock); static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net_device *dev = ifa->ifa_dev->dev; struct net *net = dev_net(dev); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } spin_unlock(&nfsd_notifier_lock); out: return NOTIFY_DONE; } static struct notifier_block nfsd_inetaddr_notifier = { .notifier_call = nfsd_inetaddr_event, }; #if IS_ENABLED(CONFIG_IPV6) static int nfsd_inet6addr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct net_device *dev = ifa->idev->dev; struct net *net = dev_net(dev); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } spin_unlock(&nfsd_notifier_lock); out: return NOTIFY_DONE; } static struct notifier_block nfsd_inet6addr_notifier = { .notifier_call = nfsd_inet6addr_event, }; #endif /* Only used under nfsd_mutex, so this atomic may be overkill: */ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); /** * nfsd_destroy_serv - tear down NFSD's svc_serv for a namespace * @net: network namespace the NFS service is associated with */ void nfsd_destroy_serv(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv = nn->nfsd_serv; spin_lock(&nfsd_notifier_lock); nn->nfsd_serv = NULL; spin_unlock(&nfsd_notifier_lock); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } svc_xprt_destroy_all(serv, net); /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are * started, then nfsd_destroy_serv will be run before any of this * other initialization has been done except the rpcb information. */ svc_rpcb_cleanup(serv, net); if (!nn->nfsd_net_up) return; nfsd_shutdown_net(net); nfsd_export_flush(net); svc_destroy(&serv); } void nfsd_reset_versions(struct nfsd_net *nn) { int i; for (i = 0; i < NFSD_NRVERS; i++) if (nfsd_vers(nn, i, NFSD_TEST)) return; for (i = 0; i < NFSD_NRVERS; i++) if (i != 4) nfsd_vers(nn, i, NFSD_SET); else { int minor = 0; while (nfsd_minorversion(nn, minor, NFSD_SET) >= 0) minor++; } } /* * Each session guarantees a negotiated per slot memory cache for replies * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated * NFSv4.1 server might want to use more memory for a DRC than a machine * with mutiple services. * * Impose a hard limit on the number of pages for the DRC which varies * according to the machines free pages. This is of course only a default. * * For now this is a #defined shift which could be under admin control * in the future. */ static void set_max_drc(void) { #define NFSD_DRC_SIZE_SHIFT 7 nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); } static int nfsd_get_default_max_blksize(void) { struct sysinfo i; unsigned long long target; unsigned long ret; si_meminfo(&i); target = (i.totalram - i.totalhigh) << PAGE_SHIFT; /* * Aim for 1/4096 of memory per thread This gives 1MB on 4Gig * machines, but only uses 32K on 128M machines. Bottom out at * 8K on 32M and smaller. Of course, this is only a default. */ target >>= 12; ret = NFSSVC_MAXBLKSIZE; while (ret > target && ret >= 8*1024*2) ret /= 2; return ret; } void nfsd_shutdown_threads(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; mutex_lock(&nfsd_mutex); serv = nn->nfsd_serv; if (serv == NULL) { mutex_unlock(&nfsd_mutex); return; } /* Kill outstanding nfsd threads */ svc_set_num_threads(serv, NULL, 0); nfsd_destroy_serv(net); mutex_unlock(&nfsd_mutex); } bool i_am_nfsd(void) { return kthread_func(current) == nfsd; } int nfsd_create_serv(struct net *net) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nn->nfsd_serv) return 0; if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); if (serv == NULL) return -ENOMEM; serv->sv_maxconn = nn->max_connections; error = svc_bind(serv, net); if (error < 0) { svc_destroy(&serv); return error; } spin_lock(&nfsd_notifier_lock); nn->nfsd_info.mutex = &nfsd_mutex; nn->nfsd_serv = serv; spin_unlock(&nfsd_notifier_lock); set_max_drc(); /* check if the notifier is already set */ if (atomic_inc_return(&nfsd_notifier_refcount) == 1) { register_inetaddr_notifier(&nfsd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } nfsd_reset_write_verifier(nn); return 0; } int nfsd_nrpools(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (nn->nfsd_serv == NULL) return 0; else return nn->nfsd_serv->sv_nrpools; } int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv = nn->nfsd_serv; int i; if (serv) for (i = 0; i < serv->sv_nrpools && i < n; i++) nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); return 0; } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; int tot = 0; int err = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nn->nfsd_serv == NULL || n <= 0) return 0; if (n > nn->nfsd_serv->sv_nrpools) n = nn->nfsd_serv->sv_nrpools; /* enforce a global maximum number of threads */ tot = 0; for (i = 0; i < n; i++) { nthreads[i] = min(nthreads[i], NFSD_MAXSERVS); tot += nthreads[i]; } if (tot > NFSD_MAXSERVS) { /* total too large: scale down requested numbers */ for (i = 0; i < n && tot > 0; i++) { int new = nthreads[i] * NFSD_MAXSERVS / tot; tot -= (nthreads[i] - new); nthreads[i] = new; } for (i = 0; i < n && tot > 0; i++) { nthreads[i]--; tot--; } } /* * There must always be a thread in pool 0; the admin * can't shut down NFS completely using pool_threads. */ if (nthreads[0] == 0) nthreads[0] = 1; /* apply the new numbers */ for (i = 0; i < n; i++) { err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; } return err; } /* * Adjust the number of threads and return the new number of threads. * This is also the function that starts the server if necessary, if * this is the first time nrservs is nonzero. */ int nfsd_svc(int nrservs, struct net *net, const struct cred *cred) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); nrservs = max(nrservs, 0); nrservs = min(nrservs, NFSD_MAXSERVS); error = 0; if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; strscpy(nn->nfsd_name, utsname()->nodename, sizeof(nn->nfsd_name)); error = nfsd_create_serv(net); if (error) goto out; serv = nn->nfsd_serv; error = nfsd_startup_net(net, cred); if (error) goto out_put; error = svc_set_num_threads(serv, NULL, nrservs); if (error) goto out_put; error = serv->sv_nrthreads; out_put: if (serv->sv_nrthreads == 0) nfsd_destroy_serv(net); out: mutex_unlock(&nfsd_mutex); return error; } #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static bool nfsd_support_acl_version(int vers) { if (vers >= NFSD_ACL_MINVERS && vers < NFSD_ACL_NRVERS) return nfsd_acl_version[vers] != NULL; return false; } static int nfsd_acl_rpcbind_set(struct net *net, const struct svc_program *progp, u32 version, int family, unsigned short proto, unsigned short port) { if (!nfsd_support_acl_version(version) || !nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST)) return 0; return svc_generic_rpcbind_set(net, progp, version, family, proto, port); } static __be32 nfsd_acl_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, struct svc_process_info *ret) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); int i; if (likely(nfsd_support_acl_version(rqstp->rq_vers) && nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) return svc_generic_init_request(rqstp, progp, ret); ret->mismatch.lovers = NFSD_ACL_NRVERS; for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) { if (nfsd_support_acl_version(rqstp->rq_vers) && nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.lovers = i; break; } } if (ret->mismatch.lovers == NFSD_ACL_NRVERS) return rpc_prog_unavail; ret->mismatch.hivers = NFSD_ACL_MINVERS; for (i = NFSD_ACL_NRVERS - 1; i >= NFSD_ACL_MINVERS; i--) { if (nfsd_support_acl_version(rqstp->rq_vers) && nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.hivers = i; break; } } return rpc_prog_mismatch; } #endif static int nfsd_rpcbind_set(struct net *net, const struct svc_program *progp, u32 version, int family, unsigned short proto, unsigned short port) { if (!nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST)) return 0; return svc_generic_rpcbind_set(net, progp, version, family, proto, port); } static __be32 nfsd_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, struct svc_process_info *ret) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); int i; if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) return svc_generic_init_request(rqstp, progp, ret); ret->mismatch.lovers = NFSD_NRVERS; for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { if (nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.lovers = i; break; } } if (ret->mismatch.lovers == NFSD_NRVERS) return rpc_prog_unavail; ret->mismatch.hivers = NFSD_MINVERS; for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) { if (nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.hivers = i; break; } } return rpc_prog_mismatch; } /* * This is the NFS server kernel thread */ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); struct net *net = perm_sock->xpt_net; struct nfsd_net *nn = net_generic(net, nfsd_net_id); /* At this point, the thread shares current->fs * with the init process. We need to create files with the * umask as defined by the client instead of init's umask. */ if (unshare_fs_struct() < 0) { printk("Unable to start nfsd thread: out of memory\n"); goto out; } current->fs->umask = 0; atomic_inc(&nfsdstats.th_cnt); set_freezable(); /* * The main request loop */ while (!svc_thread_should_stop(rqstp)) { /* Update sv_maxconn if it has changed */ rqstp->rq_server->sv_maxconn = nn->max_connections; svc_recv(rqstp); } atomic_dec(&nfsdstats.th_cnt); out: /* Release the thread */ svc_exit_thread(rqstp); return 0; } /** * nfsd_dispatch - Process an NFS or NFSACL Request * @rqstp: incoming request * * This RPC dispatcher integrates the NFS server's duplicate reply cache. * * Return values: * %0: Processing complete; do not send a Reply * %1: Processing complete; send Reply in rqstp->rq_res */ int nfsd_dispatch(struct svc_rqst *rqstp) { const struct svc_procedure *proc = rqstp->rq_procinfo; __be32 *statp = rqstp->rq_accept_statp; struct nfsd_cacherep *rp; unsigned int start, len; __be32 *nfs_reply; /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) */ rqstp->rq_cachetype = proc->pc_cachetype; /* * ->pc_decode advances the argument stream past the NFS * Call header, so grab the header's starting location and * size now for the call to nfsd_cache_lookup(). */ start = xdr_stream_pos(&rqstp->rq_arg_stream); len = xdr_stream_remaining(&rqstp->rq_arg_stream); if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; /* * Release rq_status_counter setting it to an odd value after the rpc * request has been properly parsed. rq_status_counter is used to * notify the consumers if the rqstp fields are stable * (rq_status_counter is odd) or not meaningful (rq_status_counter * is even). */ smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1); rp = NULL; switch (nfsd_cache_lookup(rqstp, start, len, &rp)) { case RC_DOIT: break; case RC_REPLY: goto out_cached_reply; case RC_DROPIT: goto out_dropit; } nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); *statp = proc->pc_func(rqstp); if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; /* * Release rq_status_counter setting it to an even value after the rpc * request has been properly processed. */ smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1); nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply); out_cached_reply: return 1; out_decode_err: trace_nfsd_garbage_args_err(rqstp); *statp = rpc_garbage_args; return 1; out_update_drop: nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL); out_dropit: return 0; out_encode_err: trace_nfsd_cant_encode_err(rqstp); nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL); *statp = rpc_system_err; return 1; } /** * nfssvc_decode_voidarg - Decode void arguments * @rqstp: Server RPC transaction context * @xdr: XDR stream positioned at arguments to decode * * Return values: * %false: Arguments were not valid * %true: Decoding was successful */ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return true; } /** * nfssvc_encode_voidres - Encode void results * @rqstp: Server RPC transaction context * @xdr: XDR stream into which to encode results * * Return values: * %false: Local error while encoding * %true: Encoding was successful */ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return true; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) { struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); return svc_pool_stats_open(&nn->nfsd_info, file); } |
155 155 134 134 2 2 2 2 2 2 137 136 111 63 63 63 62 65 65 22 22 22 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 | /* * linux/drivers/video/fbmem.c * * Copyright (C) 1994 Martin Schaller * * 2001 - Documented with DocBook * - Brad Douglas <brad@neruo.com> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive * for more details. */ #include <linux/console.h> #include <linux/export.h> #include <linux/fb.h> #include <linux/fbcon.h> #include <video/nomodeset.h> #include "fb_internal.h" /* * Frame buffer device initialization and setup routines */ #define FBPIXMAPSIZE (1024 * 8) struct class *fb_class; DEFINE_MUTEX(registration_lock); struct fb_info *registered_fb[FB_MAX] __read_mostly; int num_registered_fb __read_mostly; #define for_each_registered_fb(i) \ for (i = 0; i < FB_MAX; i++) \ if (!registered_fb[i]) {} else struct fb_info *get_fb_info(unsigned int idx) { struct fb_info *fb_info; if (idx >= FB_MAX) return ERR_PTR(-ENODEV); mutex_lock(®istration_lock); fb_info = registered_fb[idx]; if (fb_info) refcount_inc(&fb_info->count); mutex_unlock(®istration_lock); return fb_info; } void put_fb_info(struct fb_info *fb_info) { if (!refcount_dec_and_test(&fb_info->count)) return; if (fb_info->fbops->fb_destroy) fb_info->fbops->fb_destroy(fb_info); } /* * Helpers */ int fb_get_color_depth(struct fb_var_screeninfo *var, struct fb_fix_screeninfo *fix) { int depth = 0; if (fix->visual == FB_VISUAL_MONO01 || fix->visual == FB_VISUAL_MONO10) depth = 1; else { if (var->green.length == var->blue.length && var->green.length == var->red.length && var->green.offset == var->blue.offset && var->green.offset == var->red.offset) depth = var->green.length; else depth = var->green.length + var->red.length + var->blue.length; } return depth; } EXPORT_SYMBOL(fb_get_color_depth); /* * Data padding functions. */ void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height) { __fb_pad_aligned_buffer(dst, d_pitch, src, s_pitch, height); } EXPORT_SYMBOL(fb_pad_aligned_buffer); void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod) { u8 mask = (u8) (0xfff << shift_high), tmp; int i, j; for (i = height; i--; ) { for (j = 0; j < idx; j++) { tmp = dst[j]; tmp &= mask; tmp |= *src >> shift_low; dst[j] = tmp; tmp = *src << shift_high; dst[j+1] = tmp; src++; } tmp = dst[idx]; tmp &= mask; tmp |= *src >> shift_low; dst[idx] = tmp; if (shift_high < mod) { tmp = *src << shift_high; dst[idx+1] = tmp; } src++; dst += d_pitch; } } EXPORT_SYMBOL(fb_pad_unaligned_buffer); /* * we need to lock this section since fb_cursor * may use fb_imageblit() */ char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size) { u32 align = buf->buf_align - 1, offset; char *addr = buf->addr; /* If IO mapped, we need to sync before access, no sharing of * the pixmap is done */ if (buf->flags & FB_PIXMAP_IO) { if (info->fbops->fb_sync && (buf->flags & FB_PIXMAP_SYNC)) info->fbops->fb_sync(info); return addr; } /* See if we fit in the remaining pixmap space */ offset = buf->offset + align; offset &= ~align; if (offset + size > buf->size) { /* We do not fit. In order to be able to re-use the buffer, * we must ensure no asynchronous DMA'ing or whatever operation * is in progress, we sync for that. */ if (info->fbops->fb_sync && (buf->flags & FB_PIXMAP_SYNC)) info->fbops->fb_sync(info); offset = 0; } buf->offset = offset + size; addr += offset; return addr; } EXPORT_SYMBOL(fb_get_buffer_offset); int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var) { struct fb_fix_screeninfo *fix = &info->fix; unsigned int yres = info->var.yres; int err = 0; if (var->yoffset > 0) { if (var->vmode & FB_VMODE_YWRAP) { if (!fix->ywrapstep || (var->yoffset % fix->ywrapstep)) err = -EINVAL; else yres = 0; } else if (!fix->ypanstep || (var->yoffset % fix->ypanstep)) err = -EINVAL; } if (var->xoffset > 0 && (!fix->xpanstep || (var->xoffset % fix->xpanstep))) err = -EINVAL; if (err || !info->fbops->fb_pan_display || var->yoffset > info->var.yres_virtual - yres || var->xoffset > info->var.xres_virtual - info->var.xres) return -EINVAL; if ((err = info->fbops->fb_pan_display(var, info))) return err; info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; if (var->vmode & FB_VMODE_YWRAP) info->var.vmode |= FB_VMODE_YWRAP; else info->var.vmode &= ~FB_VMODE_YWRAP; return 0; } EXPORT_SYMBOL(fb_pan_display); static int fb_check_caps(struct fb_info *info, struct fb_var_screeninfo *var, u32 activate) { struct fb_blit_caps caps, fbcaps; int err = 0; memset(&caps, 0, sizeof(caps)); memset(&fbcaps, 0, sizeof(fbcaps)); caps.flags = (activate & FB_ACTIVATE_ALL) ? 1 : 0; fbcon_get_requirement(info, &caps); info->fbops->fb_get_caps(info, &fbcaps, var); if (((fbcaps.x ^ caps.x) & caps.x) || ((fbcaps.y ^ caps.y) & caps.y) || (fbcaps.len < caps.len)) err = -EINVAL; return err; } int fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) { int ret = 0; u32 activate; struct fb_var_screeninfo old_var; struct fb_videomode mode; struct fb_event event; u32 unused; if (var->activate & FB_ACTIVATE_INV_MODE) { struct fb_videomode mode1, mode2; fb_var_to_videomode(&mode1, var); fb_var_to_videomode(&mode2, &info->var); /* make sure we don't delete the videomode of current var */ ret = fb_mode_is_equal(&mode1, &mode2); if (!ret) { ret = fbcon_mode_deleted(info, &mode1); if (!ret) fb_delete_videomode(&mode1, &info->modelist); } return ret ? -EINVAL : 0; } if (!(var->activate & FB_ACTIVATE_FORCE) && !memcmp(&info->var, var, sizeof(struct fb_var_screeninfo))) return 0; activate = var->activate; /* When using FOURCC mode, make sure the red, green, blue and * transp fields are set to 0. */ if ((info->fix.capabilities & FB_CAP_FOURCC) && var->grayscale > 1) { if (var->red.offset || var->green.offset || var->blue.offset || var->transp.offset || var->red.length || var->green.length || var->blue.length || var->transp.length || var->red.msb_right || var->green.msb_right || var->blue.msb_right || var->transp.msb_right) return -EINVAL; } if (!info->fbops->fb_check_var) { *var = info->var; return 0; } /* bitfill_aligned() assumes that it's at least 8x8 */ if (var->xres < 8 || var->yres < 8) return -EINVAL; /* Too huge resolution causes multiplication overflow. */ if (check_mul_overflow(var->xres, var->yres, &unused) || check_mul_overflow(var->xres_virtual, var->yres_virtual, &unused)) return -EINVAL; ret = info->fbops->fb_check_var(var, info); if (ret) return ret; /* verify that virtual resolution >= physical resolution */ if (var->xres_virtual < var->xres || var->yres_virtual < var->yres) { pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n", info->fix.id, var->xres_virtual, var->yres_virtual, var->xres, var->yres); return -EINVAL; } if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW) return 0; if (info->fbops->fb_get_caps) { ret = fb_check_caps(info, var, activate); if (ret) return ret; } old_var = info->var; info->var = *var; if (info->fbops->fb_set_par) { ret = info->fbops->fb_set_par(info); if (ret) { info->var = old_var; printk(KERN_WARNING "detected " "fb_set_par error, " "error code: %d\n", ret); return ret; } } fb_pan_display(info, &info->var); fb_set_cmap(&info->cmap, info); fb_var_to_videomode(&mode, &info->var); if (info->modelist.prev && info->modelist.next && !list_empty(&info->modelist)) ret = fb_add_videomode(&mode, &info->modelist); if (ret) return ret; event.info = info; event.data = &mode; fb_notifier_call_chain(FB_EVENT_MODE_CHANGE, &event); return 0; } EXPORT_SYMBOL(fb_set_var); int fb_blank(struct fb_info *info, int blank) { struct fb_event event; int ret = -EINVAL; if (blank > FB_BLANK_POWERDOWN) blank = FB_BLANK_POWERDOWN; event.info = info; event.data = ␣ if (info->fbops->fb_blank) ret = info->fbops->fb_blank(blank, info); if (!ret) fb_notifier_call_chain(FB_EVENT_BLANK, &event); return ret; } EXPORT_SYMBOL(fb_blank); static int fb_check_foreignness(struct fb_info *fi) { const bool foreign_endian = fi->flags & FBINFO_FOREIGN_ENDIAN; fi->flags &= ~FBINFO_FOREIGN_ENDIAN; #ifdef __BIG_ENDIAN fi->flags |= foreign_endian ? 0 : FBINFO_BE_MATH; #else fi->flags |= foreign_endian ? FBINFO_BE_MATH : 0; #endif /* __BIG_ENDIAN */ if (fi->flags & FBINFO_BE_MATH && !fb_be_math(fi)) { pr_err("%s: enable CONFIG_FB_BIG_ENDIAN to " "support this framebuffer\n", fi->fix.id); return -ENOSYS; } else if (!(fi->flags & FBINFO_BE_MATH) && fb_be_math(fi)) { pr_err("%s: enable CONFIG_FB_LITTLE_ENDIAN to " "support this framebuffer\n", fi->fix.id); return -ENOSYS; } return 0; } static int do_register_framebuffer(struct fb_info *fb_info) { int i; struct fb_videomode mode; if (fb_check_foreignness(fb_info)) return -ENOSYS; if (num_registered_fb == FB_MAX) return -ENXIO; num_registered_fb++; for (i = 0 ; i < FB_MAX; i++) if (!registered_fb[i]) break; fb_info->node = i; refcount_set(&fb_info->count, 1); mutex_init(&fb_info->lock); mutex_init(&fb_info->mm_lock); fb_device_create(fb_info); if (fb_info->pixmap.addr == NULL) { fb_info->pixmap.addr = kmalloc(FBPIXMAPSIZE, GFP_KERNEL); if (fb_info->pixmap.addr) { fb_info->pixmap.size = FBPIXMAPSIZE; fb_info->pixmap.buf_align = 1; fb_info->pixmap.scan_align = 1; fb_info->pixmap.access_align = 32; fb_info->pixmap.flags = FB_PIXMAP_DEFAULT; } } fb_info->pixmap.offset = 0; if (!fb_info->pixmap.blit_x) fb_info->pixmap.blit_x = ~(u32)0; if (!fb_info->pixmap.blit_y) fb_info->pixmap.blit_y = ~(u32)0; if (!fb_info->modelist.prev || !fb_info->modelist.next) INIT_LIST_HEAD(&fb_info->modelist); if (fb_info->skip_vt_switch) pm_vt_switch_required(fb_info->device, false); else pm_vt_switch_required(fb_info->device, true); fb_var_to_videomode(&mode, &fb_info->var); fb_add_videomode(&mode, &fb_info->modelist); registered_fb[i] = fb_info; #ifdef CONFIG_GUMSTIX_AM200EPD { struct fb_event event; event.info = fb_info; fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event); } #endif return fbcon_fb_registered(fb_info); } static void unbind_console(struct fb_info *fb_info) { int i = fb_info->node; if (WARN_ON(i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)) return; fbcon_fb_unbind(fb_info); } static void unlink_framebuffer(struct fb_info *fb_info) { int i; i = fb_info->node; if (WARN_ON(i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)) return; fb_device_destroy(fb_info); pm_vt_switch_unregister(fb_info->device); unbind_console(fb_info); } static void do_unregister_framebuffer(struct fb_info *fb_info) { unlink_framebuffer(fb_info); if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) { kfree(fb_info->pixmap.addr); fb_info->pixmap.addr = NULL; } fb_destroy_modelist(&fb_info->modelist); registered_fb[fb_info->node] = NULL; num_registered_fb--; #ifdef CONFIG_GUMSTIX_AM200EPD { struct fb_event event; event.info = fb_info; fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); } #endif fbcon_fb_unregistered(fb_info); /* this may free fb info */ put_fb_info(fb_info); } /** * register_framebuffer - registers a frame buffer device * @fb_info: frame buffer info structure * * Registers a frame buffer device @fb_info. * * Returns negative errno on error, or zero for success. * */ int register_framebuffer(struct fb_info *fb_info) { int ret; mutex_lock(®istration_lock); ret = do_register_framebuffer(fb_info); mutex_unlock(®istration_lock); return ret; } EXPORT_SYMBOL(register_framebuffer); /** * unregister_framebuffer - releases a frame buffer device * @fb_info: frame buffer info structure * * Unregisters a frame buffer device @fb_info. * * Returns negative errno on error, or zero for success. * * This function will also notify the framebuffer console * to release the driver. * * This is meant to be called within a driver's module_exit() * function. If this is called outside module_exit(), ensure * that the driver implements fb_open() and fb_release() to * check that no processes are using the device. */ void unregister_framebuffer(struct fb_info *fb_info) { mutex_lock(®istration_lock); do_unregister_framebuffer(fb_info); mutex_unlock(®istration_lock); } EXPORT_SYMBOL(unregister_framebuffer); /** * fb_set_suspend - low level driver signals suspend * @info: framebuffer affected * @state: 0 = resuming, !=0 = suspending * * This is meant to be used by low level drivers to * signal suspend/resume to the core & clients. * It must be called with the console semaphore held */ void fb_set_suspend(struct fb_info *info, int state) { WARN_CONSOLE_UNLOCKED(); if (state) { fbcon_suspended(info); info->state = FBINFO_STATE_SUSPENDED; } else { info->state = FBINFO_STATE_RUNNING; fbcon_resumed(info); } } EXPORT_SYMBOL(fb_set_suspend); static int __init fbmem_init(void) { int ret; fb_class = class_create("graphics"); if (IS_ERR(fb_class)) { ret = PTR_ERR(fb_class); pr_err("Unable to create fb class; errno = %d\n", ret); goto err_fb_class; } ret = fb_init_procfs(); if (ret) goto err_class_destroy; ret = fb_register_chrdev(); if (ret) goto err_fb_cleanup_procfs; fb_console_init(); return 0; err_fb_cleanup_procfs: fb_cleanup_procfs(); err_class_destroy: class_destroy(fb_class); err_fb_class: fb_class = NULL; return ret; } #ifdef MODULE static void __exit fbmem_exit(void) { fb_console_exit(); fb_unregister_chrdev(); fb_cleanup_procfs(); class_destroy(fb_class); } module_init(fbmem_init); module_exit(fbmem_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Framebuffer base"); #else subsys_initcall(fbmem_init); #endif int fb_new_modelist(struct fb_info *info) { struct fb_var_screeninfo var = info->var; struct list_head *pos, *n; struct fb_modelist *modelist; struct fb_videomode *m, mode; int err; list_for_each_safe(pos, n, &info->modelist) { modelist = list_entry(pos, struct fb_modelist, list); m = &modelist->mode; fb_videomode_to_var(&var, m); var.activate = FB_ACTIVATE_TEST; err = fb_set_var(info, &var); fb_var_to_videomode(&mode, &var); if (err || !fb_mode_is_equal(m, &mode)) { list_del(pos); kfree(pos); } } if (list_empty(&info->modelist)) return 1; fbcon_new_modelist(info); return 0; } #if defined(CONFIG_VIDEO_NOMODESET) bool fb_modesetting_disabled(const char *drvname) { bool fwonly = video_firmware_drivers_only(); if (fwonly) pr_warn("Driver %s not loading because of nomodeset parameter\n", drvname); return fwonly; } EXPORT_SYMBOL(fb_modesetting_disabled); #endif MODULE_LICENSE("GPL"); |
12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/icmpv6.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <net/ipv6.h> #if IS_ENABLED(CONFIG_IPV6) #if !IS_BUILTIN(CONFIG_IPV6) static ip6_icmp_send_t __rcu *ip6_icmp_send; int inet6_register_icmp_sender(ip6_icmp_send_t *fn) { return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ? 0 : -EBUSY; } EXPORT_SYMBOL(inet6_register_icmp_sender); int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) { int ret; ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ? 0 : -EINVAL; synchronize_net(); return ret; } EXPORT_SYMBOL(inet6_unregister_icmp_sender); void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct inet6_skb_parm *parm) { ip6_icmp_send_t *send; rcu_read_lock(); send = rcu_dereference(ip6_icmp_send); if (send) send(skb, type, code, info, NULL, parm); rcu_read_unlock(); } EXPORT_SYMBOL(__icmpv6_send); #endif #if IS_ENABLED(CONFIG_NF_NAT) #include <net/netfilter/nf_conntrack.h> void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { struct inet6_skb_parm parm = { 0 }; struct sk_buff *cloned_skb = NULL; enum ip_conntrack_info ctinfo; struct in6_addr orig_ip; struct nf_conn *ct; ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(ct->status & IPS_SRC_NAT)) { __icmpv6_send(skb_in, type, code, info, &parm); return; } if (skb_shared(skb_in)) skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) > skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, skb_network_offset(skb_in) + sizeof(struct ipv6hdr)))) goto out; orig_ip = ipv6_hdr(skb_in)->saddr; ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; __icmpv6_send(skb_in, type, code, info, &parm); ipv6_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); } EXPORT_SYMBOL(icmpv6_ndo_send); #endif #endif |
79 77 71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2002, 2004 * Copyright (c) 2002 Intel Corp. * * This file is part of the SCTP kernel implementation * * Sysctl related interfaces for SCTP. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Mingqin Liu <liuming@us.ibm.com> * Jon Grimm <jgrimm@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * Ryan Layer <rmlayer@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <linux/sysctl.h> static int timer_max = 86400000; /* ms in one day */ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = SCTP_SCOPE_POLICY_MAX; static int rwnd_scale_max = 16; static int rto_alpha_min = 0; static int rto_beta_min = 0; static int rto_alpha_max = 1000; static int rto_beta_max = 1000; static int pf_expose_max = SCTP_PF_EXPOSE_MAX; static int ps_retrans_max = SCTP_PS_RETRANS_MAX; static int udp_port_max = 65535; static unsigned long max_autoclose_min = 0; static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), .mode = 0644, .proc_handler = proc_doulongvec_minmax }, { .procname = "sctp_rmem", .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "sctp_wmem", .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), .mode = 0644, .proc_handler = proc_dointvec, }, { /* sentinel */ } }; /* The following index defines are used in sctp_sysctl_net_register(). * If you add new items to the sctp_net_table, please ensure that * the index values of these defines hold the same meaning indicated by * their macro names when they appear in sctp_net_table. */ #define SCTP_RTO_MIN_IDX 0 #define SCTP_RTO_MAX_IDX 1 #define SCTP_PF_RETRANS_IDX 2 #define SCTP_PS_RETRANS_IDX 3 static struct ctl_table sctp_net_table[] = { [SCTP_RTO_MIN_IDX] = { .procname = "rto_min", .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_sctp_do_rto_min, .extra1 = SYSCTL_ONE, .extra2 = &init_net.sctp.rto_max }, [SCTP_RTO_MAX_IDX] = { .procname = "rto_max", .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_sctp_do_rto_max, .extra1 = &init_net.sctp.rto_min, .extra2 = &timer_max }, [SCTP_PF_RETRANS_IDX] = { .procname = "pf_retrans", .data = &init_net.sctp.pf_retrans, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &init_net.sctp.ps_retrans, }, [SCTP_PS_RETRANS_IDX] = { .procname = "ps_retrans", .data = &init_net.sctp.ps_retrans, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &init_net.sctp.pf_retrans, .extra2 = &ps_retrans_max, }, { .procname = "rto_initial", .data = &init_net.sctp.rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "rto_alpha_exp_divisor", .data = &init_net.sctp.rto_alpha, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_alpha_beta, .extra1 = &rto_alpha_min, .extra2 = &rto_alpha_max, }, { .procname = "rto_beta_exp_divisor", .data = &init_net.sctp.rto_beta, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_alpha_beta, .extra1 = &rto_beta_min, .extra2 = &rto_beta_max, }, { .procname = "max_burst", .data = &init_net.sctp.max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, { .procname = "cookie_preserve_enable", .data = &init_net.sctp.cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "cookie_hmac_alg", .data = &init_net.sctp.sctp_hmac_alg, .maxlen = 8, .mode = 0644, .proc_handler = proc_sctp_do_hmac_alg, }, { .procname = "valid_cookie_life", .data = &init_net.sctp.valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "sack_timeout", .data = &init_net.sctp.sack_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, { .procname = "hb_interval", .data = &init_net.sctp.hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "association_max_retrans", .data = &init_net.sctp.max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "path_max_retrans", .data = &init_net.sctp.max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "max_init_retransmits", .data = &init_net.sctp.max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "sndbuf_policy", .data = &init_net.sctp.sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "rcvbuf_policy", .data = &init_net.sctp.rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "default_auto_asconf", .data = &init_net.sctp.default_auto_asconf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addip_enable", .data = &init_net.sctp.addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addip_noauth_enable", .data = &init_net.sctp.addip_noauth, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "prsctp_enable", .data = &init_net.sctp.prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "reconf_enable", .data = &init_net.sctp.reconf_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "auth_enable", .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_auth, }, { .procname = "intl_enable", .data = &init_net.sctp.intl_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ecn_enable", .data = &init_net.sctp.ecn_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "plpmtud_probe_interval", .data = &init_net.sctp.probe_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_probe_interval, }, { .procname = "udp_port", .data = &init_net.sctp.udp_port, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_udp_port, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, { .procname = "encap_port", .data = &init_net.sctp.encap_port, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &addr_scope_max, }, { .procname = "rwnd_update_shift", .data = &init_net.sctp.rwnd_upd_shift, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &rwnd_scale_max, }, { .procname = "max_autoclose", .data = &init_net.sctp.max_autoclose, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "l3mdev_accept", .data = &init_net.sctp.l3mdev_accept, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif { .procname = "pf_enable", .data = &init_net.sctp.pf_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "pf_expose", .data = &init_net.sctp.pf_expose, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &pf_expose_max, }, { /* sentinel */ } }; static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; bool changed = false; char *none = "none"; char tmp[8] = {0}; int ret; memset(&tbl, 0, sizeof(struct ctl_table)); if (write) { tbl.data = tmp; tbl.maxlen = sizeof(tmp); } else { tbl.data = net->sctp.sctp_hmac_alg ? : none; tbl.maxlen = strlen(tbl.data); } ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { #ifdef CONFIG_CRYPTO_MD5 if (!strncmp(tmp, "md5", 3)) { net->sctp.sctp_hmac_alg = "md5"; changed = true; } #endif #ifdef CONFIG_CRYPTO_SHA1 if (!strncmp(tmp, "sha1", 4)) { net->sctp.sctp_hmac_alg = "sha1"; changed = true; } #endif if (!strncmp(tmp, "none", 4)) { net->sctp.sctp_hmac_alg = NULL; changed = true; } if (!changed) ret = -EINVAL; } return ret; } static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.rto_min; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value > max || new_value < min) return -EINVAL; net->sctp.rto_min = new_value; } return ret; } static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.rto_max; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value > max || new_value < min) return -EINVAL; net->sctp.rto_max = new_value; } return ret; } static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (write) pr_warn_once("Changing rto_alpha or rto_beta may lead to " "suboptimal rtt/srtt estimations!\n"); return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); } static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; int new_value, ret; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.auth_enable; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; net->sctp.auth_enable = new_value; /* Update the value in the control socket */ lock_sock(sk); sctp_sk(sk)->ep->auth_enable = new_value; release_sock(sk); } return ret; } static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *)ctl->extra1; unsigned int max = *(unsigned int *)ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.udp_port; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; if (new_value > max || new_value < min) return -EINVAL; net->sctp.udp_port = new_value; sctp_udp_sock_stop(net); if (new_value) { ret = sctp_udp_sock_start(net); if (ret) net->sctp.udp_port = 0; } /* Update the value in the control socket */ lock_sock(sk); sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); release_sock(sk); } return ret; } static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.probe_interval; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value && new_value < SCTP_PROBE_TIMER_MIN) return -EINVAL; net->sctp.probe_interval = new_value; } return ret; } int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; int i; table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); if (!table) return -ENOMEM; for (i = 0; table[i].data; i++) table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max; table[SCTP_RTO_MAX_IDX].extra1 = &net->sctp.rto_min; table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans; table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans; net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp", table, ARRAY_SIZE(sctp_net_table)); if (net->sctp.sysctl_header == NULL) { kfree(table); return -ENOMEM; } return 0; } void sctp_sysctl_net_unregister(struct net *net) { struct ctl_table *table; table = net->sctp.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->sctp.sysctl_header); kfree(table); } static struct ctl_table_header *sctp_sysctl_header; /* Sysctl registration. */ void sctp_sysctl_register(void) { sctp_sysctl_header = register_net_sysctl(&init_net, "net/sctp", sctp_table); } /* Sysctl deregistration. */ void sctp_sysctl_unregister(void) { unregister_net_sysctl_table(sctp_sysctl_header); } |
3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | // SPDX-License-Identifier: GPL-2.0 #include <linux/cpufreq.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> extern const struct seq_operations cpuinfo_op; static int cpuinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &cpuinfo_op); } static const struct proc_ops cpuinfo_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = cpuinfo_open, .proc_read_iter = seq_read_iter, .proc_lseek = seq_lseek, .proc_release = seq_release, }; static int __init proc_cpuinfo_init(void) { proc_create("cpuinfo", 0, NULL, &cpuinfo_proc_ops); return 0; } fs_initcall(proc_cpuinfo_init); |
97 5 249 315 64 64 116 102 64 64 64 64 175 175 175 175 141 116 143 102 175 175 15 175 175 53 64 64 64 64 64 15 64 93 64 64 64 64 64 64 64 64 64 64 15 64 64 64 20 64 1 1 15 15 15 15 15 1 1 1 1 1 1 1 73 73 73 73 73 165 165 165 13 160 238 201 232 114 73 231 231 238 237 238 238 238 238 64 64 238 64 232 1 1 231 73 232 76 32 52 1 93 49 260 259 147 257 257 255 119 22 41 100 4 1 4 156 94 68 49 49 49 93 94 94 157 94 2 103 88 104 1 104 5 106 4 2 2 313 2 311 136 137 9 95 2 139 4 135 93 9 107 93 104 154 2 1419 1424 1422 1833 1836 52 1832 1834 1155 1706 1707 552 392 358 573 336 577 24 24 68 1441 1436 2 1435 1435 15 1421 1422 1417 84 84 58 74 81 81 15 5 108 120 100 2 88 44 90 39 1 89 1 87 2 84 2 83 5 84 84 58 77 47 47 47 47 47 42 43 43 43 44 44 44 31 31 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 118 118 113 118 118 118 117 118 118 117 117 117 11 117 117 117 118 35 116 1 36 1 28 9 22 24 14 10 9 2 5 25 37 37 36 37 4 1 3 10 10 9 10 10 10 9 10 9 9 10 1 3 2 2 2 2 2 2 2 2 2 2 2 2 5 10 2 2 2 2 2 2 2 2 2 2 8 8 8 8 8 8 8 8 8 3 1 3 8 8 8 1 6 7 7 8 8 8 8 7 7 7 7 7 7 4 1 1 1 9 9 5 8 9 5 9 9 9 9 9 8 9 9 9 5 6 8 7 1 503 500 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet * & Swedish University of Agricultural Sciences. * * Jens Laas <jens.laas@data.slu.se> Swedish University of * Agricultural Sciences. * * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet * * This work is based on the LPC-trie which is originally described in: * * An experimental study of compression methods for dynamic tries * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. * https://www.csc.kth.se/~snilsson/software/dyntrie2/ * * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 * * Code from fib_hash has been reused which includes the following header: * * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IPv4 FIB: lookup engine and maintenance routines. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Substantial contributions to this work comes from: * * David S. Miller, <davem@davemloft.net> * Stephen Hemminger <shemminger@osdl.org> * Paul E. McKenney <paulmck@us.ibm.com> * Patrick McHardy <kaber@trash.net> */ #include <linux/cache.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/rcupdate.h> #include <linux/rcupdate_wait.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/vmalloc.h> #include <linux/notifier.h> #include <net/net_namespace.h> #include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> #include <net/fib_notifier.h> #include <trace/events/fib.h> #include "fib_lookup.h" static int call_fib_entry_notifier(struct notifier_block *nb, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_alias *fa, struct netlink_ext_ack *extack) { struct fib_entry_notifier_info info = { .info.extack = extack, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, .dscp = fa->fa_dscp, .type = fa->fa_type, .tb_id = fa->tb_id, }; return call_fib4_notifier(nb, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_alias *fa, struct netlink_ext_ack *extack) { struct fib_entry_notifier_info info = { .info.extack = extack, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, .dscp = fa->fa_dscp, .type = fa->fa_type, .tb_id = fa->tb_id, }; return call_fib4_notifiers(net, event_type, &info.info); } #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) #define KEY_MAX ((t_key)~0) typedef unsigned int t_key; #define IS_TRIE(n) ((n)->pos >= KEYLENGTH) #define IS_TNODE(n) ((n)->bits) #define IS_LEAF(n) (!(n)->bits) struct key_vector { t_key key; unsigned char pos; /* 2log(KEYLENGTH) bits needed */ unsigned char bits; /* 2log(KEYLENGTH) bits needed */ unsigned char slen; union { /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ struct hlist_head leaf; /* This array is valid if (pos | bits) > 0 (TNODE) */ DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode); }; }; struct tnode { struct rcu_head rcu; t_key empty_children; /* KEYLENGTH bits needed */ t_key full_children; /* KEYLENGTH bits needed */ struct key_vector __rcu *parent; struct key_vector kv[1]; #define tn_bits kv[0].bits }; #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) #define LEAF_SIZE TNODE_SIZE(1) #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats { unsigned int gets; unsigned int backtrack; unsigned int semantic_match_passed; unsigned int semantic_match_miss; unsigned int null_node_hit; unsigned int resize_node_skipped; }; #endif struct trie_stat { unsigned int totdepth; unsigned int maxdepth; unsigned int tnodes; unsigned int leaves; unsigned int nullpointers; unsigned int prefixes; unsigned int nodesizes[MAX_STAT_DEPTH]; }; struct trie { struct key_vector kv[1]; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats __percpu *stats; #endif }; static struct key_vector *resize(struct trie *t, struct key_vector *tn); static unsigned int tnode_free_size; /* * synchronize_rcu after call_rcu for outstanding dirty memory; it should be * especially useful before resizing the root node with PREEMPT_NONE configs; * the value was obtained experimentally, aiming to avoid visible slowdown. */ unsigned int sysctl_fib_sync_mem = 512 * 1024; unsigned int sysctl_fib_sync_mem_min = 64 * 1024; unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024; static struct kmem_cache *fn_alias_kmem __ro_after_init; static struct kmem_cache *trie_leaf_kmem __ro_after_init; static inline struct tnode *tn_info(struct key_vector *kv) { return container_of(kv, struct tnode, kv[0]); } /* caller must hold RTNL */ #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent) #define get_child(tn, i) rtnl_dereference((tn)->tnode[i]) /* caller must hold RCU read lock or RTNL */ #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent) #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i]) /* wrapper for rcu_assign_pointer */ static inline void node_set_parent(struct key_vector *n, struct key_vector *tp) { if (n) rcu_assign_pointer(tn_info(n)->parent, tp); } #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p) /* This provides us with the number of children in this node, in the case of a * leaf this will return 0 meaning none of the children are accessible. */ static inline unsigned long child_length(const struct key_vector *tn) { return (1ul << tn->bits) & ~(1ul); } #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos) static inline unsigned long get_index(t_key key, struct key_vector *kv) { unsigned long index = key ^ kv->key; if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos)) return 0; return index >> kv->pos; } /* To understand this stuff, an understanding of keys and all their bits is * necessary. Every node in the trie has a key associated with it, but not * all of the bits in that key are significant. * * Consider a node 'n' and its parent 'tp'. * * If n is a leaf, every bit in its key is significant. Its presence is * necessitated by path compression, since during a tree traversal (when * searching for a leaf - unless we are doing an insertion) we will completely * ignore all skipped bits we encounter. Thus we need to verify, at the end of * a potentially successful search, that we have indeed been walking the * correct key path. * * Note that we can never "miss" the correct key in the tree if present by * following the wrong path. Path compression ensures that segments of the key * that are the same for all keys with a given prefix are skipped, but the * skipped part *is* identical for each node in the subtrie below the skipped * bit! trie_insert() in this implementation takes care of that. * * if n is an internal node - a 'tnode' here, the various parts of its key * have many different meanings. * * Example: * _________________________________________________________________ * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | * ----------------------------------------------------------------- * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 * * _________________________________________________________________ * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | * ----------------------------------------------------------------- * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 * * tp->pos = 22 * tp->bits = 3 * n->pos = 13 * n->bits = 4 * * First, let's just ignore the bits that come before the parent tp, that is * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this * point we do not use them for anything. * * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the * index into the parent's child array. That is, they will be used to find * 'n' among tp's children. * * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits * for the node n. * * All the bits we have seen so far are significant to the node n. The rest * of the bits are really not needed or indeed known in n->key. * * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into * n's child array, and will of course be different for each child. * * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown * at this point. */ static const int halve_threshold = 25; static const int inflate_threshold = 50; static const int halve_threshold_root = 15; static const int inflate_threshold_root = 30; static void __alias_free_mem(struct rcu_head *head) { struct fib_alias *fa = container_of(head, struct fib_alias, rcu); kmem_cache_free(fn_alias_kmem, fa); } static inline void alias_free_mem_rcu(struct fib_alias *fa) { call_rcu(&fa->rcu, __alias_free_mem); } #define TNODE_VMALLOC_MAX \ ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) static void __node_free_rcu(struct rcu_head *head) { struct tnode *n = container_of(head, struct tnode, rcu); if (!n->tn_bits) kmem_cache_free(trie_leaf_kmem, n); else kvfree(n); } #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) static struct tnode *tnode_alloc(int bits) { size_t size; /* verify bits is within bounds */ if (bits > TNODE_VMALLOC_MAX) return NULL; /* determine size and verify it is non-zero and didn't overflow */ size = TNODE_SIZE(1ul << bits); if (size <= PAGE_SIZE) return kzalloc(size, GFP_KERNEL); else return vzalloc(size); } static inline void empty_child_inc(struct key_vector *n) { tn_info(n)->empty_children++; if (!tn_info(n)->empty_children) tn_info(n)->full_children++; } static inline void empty_child_dec(struct key_vector *n) { if (!tn_info(n)->empty_children) tn_info(n)->full_children--; tn_info(n)->empty_children--; } static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) { struct key_vector *l; struct tnode *kv; kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); if (!kv) return NULL; /* initialize key vector */ l = kv->kv; l->key = key; l->pos = 0; l->bits = 0; l->slen = fa->fa_slen; /* link leaf to fib alias */ INIT_HLIST_HEAD(&l->leaf); hlist_add_head(&fa->fa_list, &l->leaf); return l; } static struct key_vector *tnode_new(t_key key, int pos, int bits) { unsigned int shift = pos + bits; struct key_vector *tn; struct tnode *tnode; /* verify bits and pos their msb bits clear and values are valid */ BUG_ON(!bits || (shift > KEYLENGTH)); tnode = tnode_alloc(bits); if (!tnode) return NULL; pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0), sizeof(struct key_vector *) << bits); if (bits == KEYLENGTH) tnode->full_children = 1; else tnode->empty_children = 1ul << bits; tn = tnode->kv; tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; tn->pos = pos; tn->bits = bits; tn->slen = pos; return tn; } /* Check whether a tnode 'n' is "full", i.e. it is an internal node * and no bits are skipped. See discussion in dyntree paper p. 6 */ static inline int tnode_full(struct key_vector *tn, struct key_vector *n) { return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); } /* Add a child at position i overwriting the old value. * Update the value of full_children and empty_children. */ static void put_child(struct key_vector *tn, unsigned long i, struct key_vector *n) { struct key_vector *chi = get_child(tn, i); int isfull, wasfull; BUG_ON(i >= child_length(tn)); /* update emptyChildren, overflow into fullChildren */ if (!n && chi) empty_child_inc(tn); if (n && !chi) empty_child_dec(tn); /* update fullChildren */ wasfull = tnode_full(tn, chi); isfull = tnode_full(tn, n); if (wasfull && !isfull) tn_info(tn)->full_children--; else if (!wasfull && isfull) tn_info(tn)->full_children++; if (n && (tn->slen < n->slen)) tn->slen = n->slen; rcu_assign_pointer(tn->tnode[i], n); } static void update_children(struct key_vector *tn) { unsigned long i; /* update all of the child parent pointers */ for (i = child_length(tn); i;) { struct key_vector *inode = get_child(tn, --i); if (!inode) continue; /* Either update the children of a tnode that * already belongs to us or update the child * to point to ourselves. */ if (node_parent(inode) == tn) update_children(inode); else node_set_parent(inode, tn); } } static inline void put_child_root(struct key_vector *tp, t_key key, struct key_vector *n) { if (IS_TRIE(tp)) rcu_assign_pointer(tp->tnode[0], n); else put_child(tp, get_index(key, tp), n); } static inline void tnode_free_init(struct key_vector *tn) { tn_info(tn)->rcu.next = NULL; } static inline void tnode_free_append(struct key_vector *tn, struct key_vector *n) { tn_info(n)->rcu.next = tn_info(tn)->rcu.next; tn_info(tn)->rcu.next = &tn_info(n)->rcu; } static void tnode_free(struct key_vector *tn) { struct callback_head *head = &tn_info(tn)->rcu; while (head) { head = head->next; tnode_free_size += TNODE_SIZE(1ul << tn->bits); node_free(tn); tn = container_of(head, struct tnode, rcu)->kv; } if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } } static struct key_vector *replace(struct trie *t, struct key_vector *oldtnode, struct key_vector *tn) { struct key_vector *tp = node_parent(oldtnode); unsigned long i; /* setup the parent pointer out of and back into this node */ NODE_INIT_PARENT(tn, tp); put_child_root(tp, tn->key, tn); /* update all of the child parent pointers */ update_children(tn); /* all pointers should be clean so we are done */ tnode_free(oldtnode); /* resize children now that oldtnode is freed */ for (i = child_length(tn); i;) { struct key_vector *inode = get_child(tn, --i); /* resize child node */ if (tnode_full(tn, inode)) tn = resize(t, inode); } return tp; } static struct key_vector *inflate(struct trie *t, struct key_vector *oldtnode) { struct key_vector *tn; unsigned long i; t_key m; pr_debug("In inflate\n"); tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); if (!tn) goto notnode; /* prepare oldtnode to be freed */ tnode_free_init(oldtnode); /* Assemble all of the pointers in our cluster, in this case that * represents all of the pointers out of our allocated nodes that * point to existing tnodes and the links between our allocated * nodes. */ for (i = child_length(oldtnode), m = 1u << tn->pos; i;) { struct key_vector *inode = get_child(oldtnode, --i); struct key_vector *node0, *node1; unsigned long j, k; /* An empty child */ if (!inode) continue; /* A leaf or an internal node with skipped bits */ if (!tnode_full(oldtnode, inode)) { put_child(tn, get_index(inode->key, tn), inode); continue; } /* drop the node in the old tnode free list */ tnode_free_append(oldtnode, inode); /* An internal node with two children */ if (inode->bits == 1) { put_child(tn, 2 * i + 1, get_child(inode, 1)); put_child(tn, 2 * i, get_child(inode, 0)); continue; } /* We will replace this node 'inode' with two new * ones, 'node0' and 'node1', each with half of the * original children. The two new nodes will have * a position one bit further down the key and this * means that the "significant" part of their keys * (see the discussion near the top of this file) * will differ by one bit, which will be "0" in * node0's key and "1" in node1's key. Since we are * moving the key position by one step, the bit that * we are moving away from - the bit at position * (tn->pos) - is the one that will differ between * node0 and node1. So... we synthesize that bit in the * two new keys. */ node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); if (!node1) goto nomem; node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); tnode_free_append(tn, node1); if (!node0) goto nomem; tnode_free_append(tn, node0); /* populate child pointers in new nodes */ for (k = child_length(inode), j = k / 2; j;) { put_child(node1, --j, get_child(inode, --k)); put_child(node0, j, get_child(inode, j)); put_child(node1, --j, get_child(inode, --k)); put_child(node0, j, get_child(inode, j)); } /* link new nodes to parent */ NODE_INIT_PARENT(node1, tn); NODE_INIT_PARENT(node0, tn); /* link parent to nodes */ put_child(tn, 2 * i + 1, node1); put_child(tn, 2 * i, node0); } /* setup the parent pointers into and out of this node */ return replace(t, oldtnode, tn); nomem: /* all pointers should be clean so we are done */ tnode_free(tn); notnode: return NULL; } static struct key_vector *halve(struct trie *t, struct key_vector *oldtnode) { struct key_vector *tn; unsigned long i; pr_debug("In halve\n"); tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); if (!tn) goto notnode; /* prepare oldtnode to be freed */ tnode_free_init(oldtnode); /* Assemble all of the pointers in our cluster, in this case that * represents all of the pointers out of our allocated nodes that * point to existing tnodes and the links between our allocated * nodes. */ for (i = child_length(oldtnode); i;) { struct key_vector *node1 = get_child(oldtnode, --i); struct key_vector *node0 = get_child(oldtnode, --i); struct key_vector *inode; /* At least one of the children is empty */ if (!node1 || !node0) { put_child(tn, i / 2, node1 ? : node0); continue; } /* Two nonempty children */ inode = tnode_new(node0->key, oldtnode->pos, 1); if (!inode) goto nomem; tnode_free_append(tn, inode); /* initialize pointers out of node */ put_child(inode, 1, node1); put_child(inode, 0, node0); NODE_INIT_PARENT(inode, tn); /* link parent to node */ put_child(tn, i / 2, inode); } /* setup the parent pointers into and out of this node */ return replace(t, oldtnode, tn); nomem: /* all pointers should be clean so we are done */ tnode_free(tn); notnode: return NULL; } static struct key_vector *collapse(struct trie *t, struct key_vector *oldtnode) { struct key_vector *n, *tp; unsigned long i; /* scan the tnode looking for that one child that might still exist */ for (n = NULL, i = child_length(oldtnode); !n && i;) n = get_child(oldtnode, --i); /* compress one level */ tp = node_parent(oldtnode); put_child_root(tp, oldtnode->key, n); node_set_parent(n, tp); /* drop dead node */ node_free(oldtnode); return tp; } static unsigned char update_suffix(struct key_vector *tn) { unsigned char slen = tn->pos; unsigned long stride, i; unsigned char slen_max; /* only vector 0 can have a suffix length greater than or equal to * tn->pos + tn->bits, the second highest node will have a suffix * length at most of tn->pos + tn->bits - 1 */ slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen); /* search though the list of children looking for nodes that might * have a suffix greater than the one we currently have. This is * why we start with a stride of 2 since a stride of 1 would * represent the nodes with suffix length equal to tn->pos */ for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) { struct key_vector *n = get_child(tn, i); if (!n || (n->slen <= slen)) continue; /* update stride and slen based on new value */ stride <<= (n->slen - slen); slen = n->slen; i &= ~(stride - 1); /* stop searching if we have hit the maximum possible value */ if (slen >= slen_max) break; } tn->slen = slen; return slen; } /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of * the Helsinki University of Technology and Matti Tikkanen of Nokia * Telecommunications, page 6: * "A node is doubled if the ratio of non-empty children to all * children in the *doubled* node is at least 'high'." * * 'high' in this instance is the variable 'inflate_threshold'. It * is expressed as a percentage, so we multiply it with * child_length() and instead of multiplying by 2 (since the * child array will be doubled by inflate()) and multiplying * the left-hand side by 100 (to handle the percentage thing) we * multiply the left-hand side by 50. * * The left-hand side may look a bit weird: child_length(tn) * - tn->empty_children is of course the number of non-null children * in the current node. tn->full_children is the number of "full" * children, that is non-null tnodes with a skip value of 0. * All of those will be doubled in the resulting inflated tnode, so * we just count them one extra time here. * * A clearer way to write this would be: * * to_be_doubled = tn->full_children; * not_to_be_doubled = child_length(tn) - tn->empty_children - * tn->full_children; * * new_child_length = child_length(tn) * 2; * * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / * new_child_length; * if (new_fill_factor >= inflate_threshold) * * ...and so on, tho it would mess up the while () loop. * * anyway, * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= * inflate_threshold * * avoid a division: * 100 * (not_to_be_doubled + 2*to_be_doubled) >= * inflate_threshold * new_child_length * * expand not_to_be_doubled and to_be_doubled, and shorten: * 100 * (child_length(tn) - tn->empty_children + * tn->full_children) >= inflate_threshold * new_child_length * * expand new_child_length: * 100 * (child_length(tn) - tn->empty_children + * tn->full_children) >= * inflate_threshold * child_length(tn) * 2 * * shorten again: * 50 * (tn->full_children + child_length(tn) - * tn->empty_children) >= inflate_threshold * * child_length(tn) * */ static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn) { unsigned long used = child_length(tn); unsigned long threshold = used; /* Keep root node larger */ threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold; used -= tn_info(tn)->empty_children; used += tn_info(tn)->full_children; /* if bits == KEYLENGTH then pos = 0, and will fail below */ return (used > 1) && tn->pos && ((50 * used) >= threshold); } static inline bool should_halve(struct key_vector *tp, struct key_vector *tn) { unsigned long used = child_length(tn); unsigned long threshold = used; /* Keep root node larger */ threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold; used -= tn_info(tn)->empty_children; /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); } static inline bool should_collapse(struct key_vector *tn) { unsigned long used = child_length(tn); used -= tn_info(tn)->empty_children; /* account for bits == KEYLENGTH case */ if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children) used -= KEY_MAX; /* One child or none, time to drop us from the trie */ return used < 2; } #define MAX_WORK 10 static struct key_vector *resize(struct trie *t, struct key_vector *tn) { #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats __percpu *stats = t->stats; #endif struct key_vector *tp = node_parent(tn); unsigned long cindex = get_index(tn->key, tp); int max_work = MAX_WORK; pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", tn, inflate_threshold, halve_threshold); /* track the tnode via the pointer from the parent instead of * doing it ourselves. This way we can let RCU fully do its * thing without us interfering */ BUG_ON(tn != get_child(tp, cindex)); /* Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. */ while (should_inflate(tp, tn) && max_work) { tp = inflate(t, tn); if (!tp) { #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->resize_node_skipped); #endif break; } max_work--; tn = get_child(tp, cindex); } /* update parent in case inflate failed */ tp = node_parent(tn); /* Return if at least one inflate is run */ if (max_work != MAX_WORK) return tp; /* Halve as long as the number of empty children in this * node is above threshold. */ while (should_halve(tp, tn) && max_work) { tp = halve(t, tn); if (!tp) { #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->resize_node_skipped); #endif break; } max_work--; tn = get_child(tp, cindex); } /* Only one child remains */ if (should_collapse(tn)) return collapse(t, tn); /* update parent in case halve failed */ return node_parent(tn); } static void node_pull_suffix(struct key_vector *tn, unsigned char slen) { unsigned char node_slen = tn->slen; while ((node_slen > tn->pos) && (node_slen > slen)) { slen = update_suffix(tn); if (node_slen == slen) break; tn = node_parent(tn); node_slen = tn->slen; } } static void node_push_suffix(struct key_vector *tn, unsigned char slen) { while (tn->slen < slen) { tn->slen = slen; tn = node_parent(tn); } } /* rcu_read_lock needs to be hold by caller from readside */ static struct key_vector *fib_find_node(struct trie *t, struct key_vector **tp, u32 key) { struct key_vector *pn, *n = t->kv; unsigned long index = 0; do { pn = n; n = get_child_rcu(n, index); if (!n) break; index = get_cindex(key, n); /* This bit of code is a bit tricky but it combines multiple * checks into a single check. The prefix consists of the * prefix plus zeros for the bits in the cindex. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. * if (index >= (1ul << bits)) * we have a mismatch in skip bits and failed * else * we know the value is cindex * * This check is safe even if bits == KEYLENGTH due to the * fact that we can only allocate a node with 32 bits if a * long is greater than 32 bits. */ if (index >= (1ul << n->bits)) { n = NULL; break; } /* keep searching until we find a perfect match leaf or NULL */ } while (IS_TNODE(n)); *tp = pn; return n; } /* Return the first fib alias matching DSCP with * priority less than or equal to PRIO. * If 'find_first' is set, return the first matching * fib alias, regardless of DSCP and priority. */ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, dscp_t dscp, u32 prio, u32 tb_id, bool find_first) { struct fib_alias *fa; if (!fah) return NULL; hlist_for_each_entry(fa, fah, fa_list) { /* Avoid Sparse warning when using dscp_t in inequalities */ u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp); u8 __dscp = inet_dscp_to_dsfield(dscp); if (fa->fa_slen < slen) continue; if (fa->fa_slen != slen) break; if (fa->tb_id > tb_id) continue; if (fa->tb_id != tb_id) break; if (find_first) return fa; if (__fa_dscp > __dscp) continue; if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp) return fa; } return NULL; } static struct fib_alias * fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) { u8 slen = KEYLENGTH - fri->dst_len; struct key_vector *l, *tp; struct fib_table *tb; struct fib_alias *fa; struct trie *t; tb = fib_get_table(net, fri->tb_id); if (!tb) return NULL; t = (struct trie *)tb->tb_data; l = fib_find_node(t, &tp, be32_to_cpu(fri->dst)); if (!l) return NULL; hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { if (fa->fa_slen == slen && fa->tb_id == fri->tb_id && fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi && fa->fa_type == fri->type) return fa; } return NULL; } void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) { u8 fib_notify_on_flag_change; struct fib_alias *fa_match; struct sk_buff *skb; int err; rcu_read_lock(); fa_match = fib_find_matching_alias(net, fri); if (!fa_match) goto out; /* These are paired with the WRITE_ONCE() happening in this function. * The reason is that we are only protected by RCU at this point. */ if (READ_ONCE(fa_match->offload) == fri->offload && READ_ONCE(fa_match->trap) == fri->trap && READ_ONCE(fa_match->offload_failed) == fri->offload_failed) goto out; WRITE_ONCE(fa_match->offload, fri->offload); WRITE_ONCE(fa_match->trap, fri->trap); fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change); /* 2 means send notifications only if offload_failed was changed. */ if (fib_notify_on_flag_change == 2 && READ_ONCE(fa_match->offload_failed) == fri->offload_failed) goto out; WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); if (!fib_notify_on_flag_change) goto out; skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); if (!skb) { err = -ENOBUFS; goto errout; } err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC); goto out; errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err); out: rcu_read_unlock(); } EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set); static void trie_rebalance(struct trie *t, struct key_vector *tn) { while (!IS_TRIE(tn)) tn = resize(t, tn); } static int fib_insert_node(struct trie *t, struct key_vector *tp, struct fib_alias *new, t_key key) { struct key_vector *n, *l; l = leaf_new(key, new); if (!l) goto noleaf; /* retrieve child from parent node */ n = get_child(tp, get_index(key, tp)); /* Case 2: n is a LEAF or a TNODE and the key doesn't match. * * Add a new tnode here * first tnode need some special handling * leaves us in position for handling as case 3 */ if (n) { struct key_vector *tn; tn = tnode_new(key, __fls(key ^ n->key), 1); if (!tn) goto notnode; /* initialize routes out of node */ NODE_INIT_PARENT(tn, tp); put_child(tn, get_index(key, tn) ^ 1, n); /* start adding routes into the node */ put_child_root(tp, key, tn); node_set_parent(n, tn); /* parent now has a NULL spot where the leaf can go */ tp = tn; } /* Case 3: n is NULL, and will just insert a new leaf */ node_push_suffix(tp, new->fa_slen); NODE_INIT_PARENT(l, tp); put_child_root(tp, key, l); trie_rebalance(t, tp); return 0; notnode: node_free(l); noleaf: return -ENOMEM; } static int fib_insert_alias(struct trie *t, struct key_vector *tp, struct key_vector *l, struct fib_alias *new, struct fib_alias *fa, t_key key) { if (!l) return fib_insert_node(t, tp, new, key); if (fa) { hlist_add_before_rcu(&new->fa_list, &fa->fa_list); } else { struct fib_alias *last; hlist_for_each_entry(last, &l->leaf, fa_list) { if (new->fa_slen < last->fa_slen) break; if ((new->fa_slen == last->fa_slen) && (new->tb_id > last->tb_id)) break; fa = last; } if (fa) hlist_add_behind_rcu(&new->fa_list, &fa->fa_list); else hlist_add_head_rcu(&new->fa_list, &l->leaf); } /* if we added to the tail node then we need to update slen */ if (l->slen < new->fa_slen) { l->slen = new->fa_slen; node_push_suffix(tp, new->fa_slen); } return 0; } static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack) { if (plen > KEYLENGTH) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); return false; } if ((plen < KEYLENGTH) && (key << plen)) { NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length"); return false; } return true; } static void fib_remove_alias(struct trie *t, struct key_vector *tp, struct key_vector *l, struct fib_alias *old); /* Caller must hold RTNL. */ int fib_table_insert(struct net *net, struct fib_table *tb, struct fib_config *cfg, struct netlink_ext_ack *extack) { struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; u16 nlflags = NLM_F_EXCL; struct fib_info *fi; u8 plen = cfg->fc_dst_len; u8 slen = KEYLENGTH - plen; dscp_t dscp; u32 key; int err; key = ntohl(cfg->fc_dst); if (!fib_valid_key_len(key, plen, extack)) return -EINVAL; pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); fi = fib_create_info(cfg, extack); if (IS_ERR(fi)) { err = PTR_ERR(fi); goto err; } dscp = cfg->fc_dscp; l = fib_find_node(t, &tp, key); fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority, tb->tb_id, false) : NULL; /* Now fa, if non-NULL, points to the first fib alias * with the same keys [prefix,dscp,priority], if such key already * exists or to the node before which we will insert new one. * * If fa is NULL, we will need to allocate a new one and * insert to the tail of the section matching the suffix length * of the new alias. */ if (fa && fa->fa_dscp == dscp && fa->fa_info->fib_priority == fi->fib_priority) { struct fib_alias *fa_first, *fa_match; err = -EEXIST; if (cfg->fc_nlflags & NLM_F_EXCL) goto out; nlflags &= ~NLM_F_EXCL; /* We have 2 goals: * 1. Find exact match for type, scope, fib_info to avoid * duplicate routes * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it */ fa_match = NULL; fa_first = fa; hlist_for_each_entry_from(fa, fa_list) { if ((fa->fa_slen != slen) || (fa->tb_id != tb->tb_id) || (fa->fa_dscp != dscp)) break; if (fa->fa_info->fib_priority != fi->fib_priority) break; if (fa->fa_type == cfg->fc_type && fa->fa_info == fi) { fa_match = fa; break; } } if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; nlflags |= NLM_F_REPLACE; fa = fa_first; if (fa_match) { if (fa == fa_match) err = 0; goto out; } err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (!new_fa) goto out; fi_drop = fa->fa_info; new_fa->fa_dscp = fa->fa_dscp; new_fa->fa_info = fi; new_fa->fa_type = cfg->fc_type; state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; new_fa->offload = 0; new_fa->trap = 0; new_fa->offload_failed = 0; hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) { enum fib_event_type fib_event; fib_event = FIB_EVENT_ENTRY_REPLACE; err = call_fib_entry_notifiers(net, fib_event, key, plen, new_fa, extack); if (err) { hlist_replace_rcu(&new_fa->fa_list, &fa->fa_list); goto out_free_new_fa; } } rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); alias_free_mem_rcu(fa); fib_release_info(fi_drop); if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); goto succeeded; } /* Error if we find a perfect match which * uses the same scope, type, and nexthop * information. */ if (fa_match) goto out; if (cfg->fc_nlflags & NLM_F_APPEND) nlflags |= NLM_F_APPEND; else fa = fa_first; } err = -ENOENT; if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; nlflags |= NLM_F_CREATE; err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (!new_fa) goto out; new_fa->fa_info = fi; new_fa->fa_dscp = dscp; new_fa->fa_type = cfg->fc_type; new_fa->fa_state = 0; new_fa->fa_slen = slen; new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; new_fa->offload = 0; new_fa->trap = 0; new_fa->offload_failed = 0; /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) goto out_free_new_fa; /* The alias was already inserted, so the node must exist. */ l = l ? l : fib_find_node(t, &tp, key); if (WARN_ON_ONCE(!l)) { err = -ENOENT; goto out_free_new_fa; } if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) { enum fib_event_type fib_event; fib_event = FIB_EVENT_ENTRY_REPLACE; err = call_fib_entry_notifiers(net, fib_event, key, plen, new_fa, extack); if (err) goto out_remove_new_fa; } if (!plen) tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: return 0; out_remove_new_fa: fib_remove_alias(t, tp, l, new_fa); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: fib_release_info(fi); err: return err; } static inline t_key prefix_mismatch(t_key key, struct key_vector *n) { t_key prefix = n->key; return (key ^ prefix) & (prefix | -prefix); } bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, const struct flowi4 *flp) { if (nhc->nhc_flags & RTNH_F_DEAD) return false; if (ip_ignore_linkdown(nhc->nhc_dev) && nhc->nhc_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) return false; if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) return false; return true; } /* should be called with rcu_read_lock */ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { struct trie *t = (struct trie *) tb->tb_data; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats __percpu *stats = t->stats; #endif const t_key key = ntohl(flp->daddr); struct key_vector *n, *pn; struct fib_alias *fa; unsigned long index; t_key cindex; pn = t->kv; cindex = 0; n = get_child_rcu(pn, cindex); if (!n) { trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN); return -EAGAIN; } #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->gets); #endif /* Step 1: Travel to the longest prefix match in the trie */ for (;;) { index = get_cindex(key, n); /* This bit of code is a bit tricky but it combines multiple * checks into a single check. The prefix consists of the * prefix plus zeros for the "bits" in the prefix. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. * if (index >= (1ul << bits)) * we have a mismatch in skip bits and failed * else * we know the value is cindex * * This check is safe even if bits == KEYLENGTH due to the * fact that we can only allocate a node with 32 bits if a * long is greater than 32 bits. */ if (index >= (1ul << n->bits)) break; /* we have found a leaf. Prefixes have already been compared */ if (IS_LEAF(n)) goto found; /* only record pn and cindex if we are going to be chopping * bits later. Otherwise we are just wasting cycles. */ if (n->slen > n->pos) { pn = n; cindex = index; } n = get_child_rcu(n, index); if (unlikely(!n)) goto backtrace; } /* Step 2: Sort out leaves and begin backtracing for longest prefix */ for (;;) { /* record the pointer where our next node pointer is stored */ struct key_vector __rcu **cptr = n->tnode; /* This test verifies that none of the bits that differ * between the key and the prefix exist in the region of * the lsb and higher in the prefix. */ if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) goto backtrace; /* exit out and process leaf */ if (unlikely(IS_LEAF(n))) break; /* Don't bother recording parent info. Since we are in * prefix match mode we will have to come back to wherever * we started this traversal anyway */ while ((n = rcu_dereference(*cptr)) == NULL) { backtrace: #ifdef CONFIG_IP_FIB_TRIE_STATS if (!n) this_cpu_inc(stats->null_node_hit); #endif /* If we are at cindex 0 there are no more bits for * us to strip at this level so we must ascend back * up one level to see if there are any more bits to * be stripped there. */ while (!cindex) { t_key pkey = pn->key; /* If we don't have a parent then there is * nothing for us to do as we do not have any * further nodes to parse. */ if (IS_TRIE(pn)) { trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN); return -EAGAIN; } #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->backtrack); #endif /* Get Child's index */ pn = node_parent_rcu(pn); cindex = get_index(pkey, pn); } /* strip the least significant bit from the cindex */ cindex &= cindex - 1; /* grab pointer for next child node */ cptr = &pn->tnode[cindex]; } } found: /* this line carries forward the xor from earlier in the function */ index = key ^ n->key; /* Step 3: Process the leaf, if that fails fall back to backtracing */ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; struct fib_nh_common *nhc; int nhsel, err; if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { if (index >= (1ul << fa->fa_slen)) continue; } if (fa->fa_dscp && inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos) continue; /* Paired with WRITE_ONCE() in fib_release_info() */ if (READ_ONCE(fi->fib_dead)) continue; if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; if (unlikely(err < 0)) { out_reject: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif trace_fib_table_lookup(tb->tb_id, flp, NULL, err); return err; } if (fi->fib_flags & RTNH_F_DEAD) continue; if (unlikely(fi->nh)) { if (nexthop_is_blackhole(fi->nh)) { err = fib_props[RTN_BLACKHOLE].error; goto out_reject; } nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp, &nhsel); if (nhc) goto set_result; goto miss; } for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { nhc = fib_info_nhc(fi, nhsel); if (!fib_lookup_good_nhc(nhc, fib_flags, flp)) continue; set_result: if (!(fib_flags & FIB_LOOKUP_NOREF)) refcount_inc(&fi->fib_clntref); res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; res->nhc = nhc; res->type = fa->fa_type; res->scope = fi->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &n->leaf; #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif trace_fib_table_lookup(tb->tb_id, flp, nhc, err); return err; } } miss: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_miss); #endif goto backtrace; } EXPORT_SYMBOL_GPL(fib_table_lookup); static void fib_remove_alias(struct trie *t, struct key_vector *tp, struct key_vector *l, struct fib_alias *old) { /* record the location of the previous list_info entry */ struct hlist_node **pprev = old->fa_list.pprev; struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next); /* remove the fib_alias from the list */ hlist_del_rcu(&old->fa_list); /* if we emptied the list this leaf will be freed and we can sort * out parent suffix lengths as a part of trie_rebalance */ if (hlist_empty(&l->leaf)) { if (tp->slen == l->slen) node_pull_suffix(tp, tp->pos); put_child_root(tp, l->key, NULL); node_free(l); trie_rebalance(t, tp); return; } /* only access fa if it is pointing at the last valid hlist_node */ if (*pprev) return; /* update the trie with the latest suffix length */ l->slen = fa->fa_slen; node_pull_suffix(tp, fa->fa_slen); } static void fib_notify_alias_delete(struct net *net, u32 key, struct hlist_head *fah, struct fib_alias *fa_to_delete, struct netlink_ext_ack *extack) { struct fib_alias *fa_next, *fa_to_notify; u32 tb_id = fa_to_delete->tb_id; u8 slen = fa_to_delete->fa_slen; enum fib_event_type fib_event; /* Do not notify if we do not care about the route. */ if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete) return; /* Determine if the route should be replaced by the next route in the * list. */ fa_next = hlist_entry_safe(fa_to_delete->fa_list.next, struct fib_alias, fa_list); if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) { fib_event = FIB_EVENT_ENTRY_REPLACE; fa_to_notify = fa_next; } else { fib_event = FIB_EVENT_ENTRY_DEL; fa_to_notify = fa_to_delete; } call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen, fa_to_notify, extack); } /* Caller must hold RTNL. */ int fib_table_delete(struct net *net, struct fib_table *tb, struct fib_config *cfg, struct netlink_ext_ack *extack) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *fa_to_delete; struct key_vector *l, *tp; u8 plen = cfg->fc_dst_len; u8 slen = KEYLENGTH - plen; dscp_t dscp; u32 key; key = ntohl(cfg->fc_dst); if (!fib_valid_key_len(key, plen, extack)) return -EINVAL; l = fib_find_node(t, &tp, key); if (!l) return -ESRCH; dscp = cfg->fc_dscp; fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false); if (!fa) return -ESRCH; pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen, inet_dscp_to_dsfield(dscp), t); fa_to_delete = NULL; hlist_for_each_entry_from(fa, fa_list) { struct fib_info *fi = fa->fa_info; if ((fa->fa_slen != slen) || (fa->tb_id != tb->tb_id) || (fa->fa_dscp != dscp)) break; if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && (cfg->fc_scope == RT_SCOPE_NOWHERE || fa->fa_info->fib_scope == cfg->fc_scope) && (!cfg->fc_prefsrc || fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && fib_nh_match(net, cfg, fi, extack) == 0 && fib_metrics_match(cfg, fi)) { fa_to_delete = fa; break; } } if (!fa_to_delete) return -ESRCH; fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); if (!plen) tb->tb_num_default--; fib_remove_alias(t, tp, l, fa_to_delete); if (fa_to_delete->fa_state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); fib_release_info(fa_to_delete->fa_info); alias_free_mem_rcu(fa_to_delete); return 0; } /* Scan for the next leaf starting at the provided key value */ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) { struct key_vector *pn, *n = *tn; unsigned long cindex; /* this loop is meant to try and find the key in the trie */ do { /* record parent and next child index */ pn = n; cindex = (key > pn->key) ? get_index(key, pn) : 0; if (cindex >> pn->bits) break; /* descend into the next child */ n = get_child_rcu(pn, cindex++); if (!n) break; /* guarantee forward progress on the keys */ if (IS_LEAF(n) && (n->key >= key)) goto found; } while (IS_TNODE(n)); /* this loop will search for the next leaf with a greater key */ while (!IS_TRIE(pn)) { /* if we exhausted the parent node we will need to climb */ if (cindex >= (1ul << pn->bits)) { t_key pkey = pn->key; pn = node_parent_rcu(pn); cindex = get_index(pkey, pn) + 1; continue; } /* grab the next available node */ n = get_child_rcu(pn, cindex++); if (!n) continue; /* no need to compare keys since we bumped the index */ if (IS_LEAF(n)) goto found; /* Rescan start scanning in new node */ pn = n; cindex = 0; } *tn = pn; return NULL; /* Root of trie */ found: /* if we are at the limit for keys just return NULL for the tnode */ *tn = pn; return n; } static void fib_trie_free(struct fib_table *tb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; unsigned long cindex = 1; struct hlist_node *tmp; struct fib_alias *fa; /* walk trie in reverse order and free everything */ for (;;) { struct key_vector *n; if (!(cindex--)) { t_key pkey = pn->key; if (IS_TRIE(pn)) break; n = pn; pn = node_parent(pn); /* drop emptied tnode */ put_child_root(pn, n->key, NULL); node_free(n); cindex = get_index(pkey, pn); continue; } /* grab the next available node */ n = get_child(pn, cindex); if (!n) continue; if (IS_TNODE(n)) { /* record pn and cindex for leaf walking */ pn = n; cindex = 1ul << n->bits; continue; } hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { hlist_del_rcu(&fa->fa_list); alias_free_mem_rcu(fa); } put_child_root(pn, n->key, NULL); node_free(n); } #ifdef CONFIG_IP_FIB_TRIE_STATS free_percpu(t->stats); #endif kfree(tb); } struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) { struct trie *ot = (struct trie *)oldtb->tb_data; struct key_vector *l, *tp = ot->kv; struct fib_table *local_tb; struct fib_alias *fa; struct trie *lt; t_key key = 0; if (oldtb->tb_data == oldtb->__data) return oldtb; local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL); if (!local_tb) return NULL; lt = (struct trie *)local_tb->tb_data; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { struct key_vector *local_l = NULL, *local_tp; hlist_for_each_entry(fa, &l->leaf, fa_list) { struct fib_alias *new_fa; if (local_tb->tb_id != fa->tb_id) continue; /* clone fa for new local table */ new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (!new_fa) goto out; memcpy(new_fa, fa, sizeof(*fa)); /* insert clone into table */ if (!local_l) local_l = fib_find_node(lt, &local_tp, l->key); if (fib_insert_alias(lt, local_tp, local_l, new_fa, NULL, l->key)) { kmem_cache_free(fn_alias_kmem, new_fa); goto out; } } /* stop loop if key wrapped back to 0 */ key = l->key + 1; if (key < l->key) break; } return local_tb; out: fib_trie_free(local_tb); return NULL; } /* Caller must hold RTNL */ void fib_table_flush_external(struct fib_table *tb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; unsigned long cindex = 1; struct hlist_node *tmp; struct fib_alias *fa; /* walk trie in reverse order */ for (;;) { unsigned char slen = 0; struct key_vector *n; if (!(cindex--)) { t_key pkey = pn->key; /* cannot resize the trie vector */ if (IS_TRIE(pn)) break; /* update the suffix to address pulled leaves */ if (pn->slen > pn->pos) update_suffix(pn); /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); continue; } /* grab the next available node */ n = get_child(pn, cindex); if (!n) continue; if (IS_TNODE(n)) { /* record pn and cindex for leaf walking */ pn = n; cindex = 1ul << n->bits; continue; } hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { /* if alias was cloned to local then we just * need to remove the local copy from main */ if (tb->tb_id != fa->tb_id) { hlist_del_rcu(&fa->fa_list); alias_free_mem_rcu(fa); continue; } /* record local slen */ slen = fa->fa_slen; } /* update leaf slen */ n->slen = slen; if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); } } } /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; struct nl_info info = { .nl_net = net }; struct key_vector *pn = t->kv; unsigned long cindex = 1; struct hlist_node *tmp; struct fib_alias *fa; int found = 0; /* walk trie in reverse order */ for (;;) { unsigned char slen = 0; struct key_vector *n; if (!(cindex--)) { t_key pkey = pn->key; /* cannot resize the trie vector */ if (IS_TRIE(pn)) break; /* update the suffix to address pulled leaves */ if (pn->slen > pn->pos) update_suffix(pn); /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); continue; } /* grab the next available node */ n = get_child(pn, cindex); if (!n) continue; if (IS_TNODE(n)) { /* record pn and cindex for leaf walking */ pn = n; cindex = 1ul << n->bits; continue; } hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; if (!fi || tb->tb_id != fa->tb_id || (!(fi->fib_flags & RTNH_F_DEAD) && !fib_props[fa->fa_type].error)) { slen = fa->fa_slen; continue; } /* Do not flush error routes if network namespace is * not being dismantled */ if (!flush_all && fib_props[fa->fa_type].error) { slen = fa->fa_slen; continue; } fib_notify_alias_delete(net, n->key, &n->leaf, fa, NULL); if (fi->pfsrc_removed) rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa, KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); found++; } /* update leaf slen */ n->slen = slen; if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); } } pr_debug("trie_flush found=%d\n", found); return found; } /* derived from fib_trie_free */ static void __fib_info_notify_update(struct net *net, struct fib_table *tb, struct nl_info *info) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; unsigned long cindex = 1; struct fib_alias *fa; for (;;) { struct key_vector *n; if (!(cindex--)) { t_key pkey = pn->key; if (IS_TRIE(pn)) break; pn = node_parent(pn); cindex = get_index(pkey, pn); continue; } /* grab the next available node */ n = get_child(pn, cindex); if (!n) continue; if (IS_TNODE(n)) { /* record pn and cindex for leaf walking */ pn = n; cindex = 1ul << n->bits; continue; } hlist_for_each_entry(fa, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id) continue; rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa, KEYLENGTH - fa->fa_slen, tb->tb_id, info, NLM_F_REPLACE); } } } void fib_info_notify_update(struct net *net, struct nl_info *info) { unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist, lockdep_rtnl_is_held()) __fib_info_notify_update(net, tb, info); } } static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb, struct notifier_block *nb, struct netlink_ext_ack *extack) { struct fib_alias *fa; int last_slen = -1; int err; hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { struct fib_info *fi = fa->fa_info; if (!fi) continue; /* local and main table can share the same trie, * so don't notify twice for the same entry. */ if (tb->tb_id != fa->tb_id) continue; if (fa->fa_slen == last_slen) continue; last_slen = fa->fa_slen; err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE, l->key, KEYLENGTH - fa->fa_slen, fa, extack); if (err) return err; } return 0; } static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb, struct netlink_ext_ack *extack) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; t_key key = 0; int err; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { err = fib_leaf_notify(l, tb, nb, extack); if (err) return err; key = l->key + 1; /* stop in case of wrap around */ if (key < l->key) break; } return 0; } int fib_notify(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { unsigned int h; int err; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { err = fib_table_notify(tb, nb, extack); if (err) return err; } } return 0; } static void __trie_free_rcu(struct rcu_head *head) { struct fib_table *tb = container_of(head, struct fib_table, rcu); #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie *t = (struct trie *)tb->tb_data; if (tb->tb_data == tb->__data) free_percpu(t->stats); #endif /* CONFIG_IP_FIB_TRIE_STATS */ kfree(tb); } void fib_free_table(struct fib_table *tb) { call_rcu(&tb->rcu, __trie_free_rcu); } static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter) { unsigned int flags = NLM_F_MULTI; __be32 xkey = htonl(l->key); int i, s_i, i_fa, s_fa, err; struct fib_alias *fa; if (filter->filter_set || !filter->dump_exceptions || !filter->dump_routes) flags |= NLM_F_DUMP_FILTERED; s_i = cb->args[4]; s_fa = cb->args[5]; i = 0; /* rcu_read_lock is hold by caller */ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { struct fib_info *fi = fa->fa_info; if (i < s_i) goto next; i_fa = 0; if (tb->tb_id != fa->tb_id) goto next; if (filter->filter_set) { if (filter->rt_type && fa->fa_type != filter->rt_type) goto next; if ((filter->protocol && fi->fib_protocol != filter->protocol)) goto next; if (filter->dev && !fib_info_nh_uses_dev(fi, filter->dev)) goto next; } if (filter->dump_routes) { if (!s_fa) { struct fib_rt_info fri; fri.fi = fi; fri.tb_id = tb->tb_id; fri.dst = xkey; fri.dst_len = KEYLENGTH - fa->fa_slen; fri.dscp = fa->fa_dscp; fri.type = fa->fa_type; fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); fri.offload_failed = READ_ONCE(fa->offload_failed); err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, &fri, flags); if (err < 0) goto stop; } i_fa++; } if (filter->dump_exceptions) { err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi, &i_fa, s_fa, flags); if (err < 0) goto stop; } next: i++; } cb->args[4] = i; return skb->len; stop: cb->args[4] = i; cb->args[5] = i_fa; return err; } /* rcu_read_lock needs to be hold by caller from readside */ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; /* Dump starting at last key. * Note: 0.0.0.0/0 (ie default) is first key. */ int count = cb->args[2]; t_key key = cb->args[3]; /* First time here, count and key are both always 0. Count > 0 * and key == 0 means the dump has wrapped around and we are done. */ if (count && !key) return skb->len; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { int err; err = fn_trie_dump_leaf(l, tb, skb, cb, filter); if (err < 0) { cb->args[3] = key; cb->args[2] = count; return err; } ++count; key = l->key + 1; memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); /* stop loop if key wrapped back to 0 */ if (key < l->key) break; } cb->args[3] = key; cb->args[2] = count; return skb->len; } void __init fib_trie_init(void) { fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", LEAF_SIZE, 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); } struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) { struct fib_table *tb; struct trie *t; size_t sz = sizeof(*tb); if (!alias) sz += sizeof(struct trie); tb = kzalloc(sz, GFP_KERNEL); if (!tb) return NULL; tb->tb_id = id; tb->tb_num_default = 0; tb->tb_data = (alias ? alias->__data : tb->__data); if (alias) return tb; t = (struct trie *) tb->tb_data; t->kv[0].pos = KEYLENGTH; t->kv[0].slen = KEYLENGTH; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats = alloc_percpu(struct trie_use_stats); if (!t->stats) { kfree(tb); tb = NULL; } #endif return tb; } #ifdef CONFIG_PROC_FS /* Depth first Trie walk iterator */ struct fib_trie_iter { struct seq_net_private p; struct fib_table *tb; struct key_vector *tnode; unsigned int index; unsigned int depth; }; static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) { unsigned long cindex = iter->index; struct key_vector *pn = iter->tnode; t_key pkey; pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); while (!IS_TRIE(pn)) { while (cindex < child_length(pn)) { struct key_vector *n = get_child_rcu(pn, cindex++); if (!n) continue; if (IS_LEAF(n)) { iter->tnode = pn; iter->index = cindex; } else { /* push down one level */ iter->tnode = n; iter->index = 0; ++iter->depth; } return n; } /* Current node exhausted, pop back up */ pkey = pn->key; pn = node_parent_rcu(pn); cindex = get_index(pkey, pn) + 1; --iter->depth; } /* record root node so further searches know we are done */ iter->tnode = pn; iter->index = 0; return NULL; } static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { struct key_vector *n, *pn; if (!t) return NULL; pn = t->kv; n = rcu_dereference(pn->tnode[0]); if (!n) return NULL; if (IS_TNODE(n)) { iter->tnode = n; iter->index = 0; iter->depth = 1; } else { iter->tnode = pn; iter->index = 0; iter->depth = 0; } return n; } static void trie_collect_stats(struct trie *t, struct trie_stat *s) { struct key_vector *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); rcu_read_lock(); for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { if (IS_LEAF(n)) { struct fib_alias *fa; s->leaves++; s->totdepth += iter.depth; if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) ++s->prefixes; } else { s->tnodes++; if (n->bits < MAX_STAT_DEPTH) s->nodesizes[n->bits]++; s->nullpointers += tn_info(n)->empty_children; } } rcu_read_unlock(); } /* * This outputs /proc/net/fib_triestats */ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) { unsigned int i, max, pointers, bytes, avdepth; if (stat->leaves) avdepth = stat->totdepth*100 / stat->leaves; else avdepth = 0; seq_printf(seq, "\tAver depth: %u.%02d\n", avdepth / 100, avdepth % 100); seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); seq_printf(seq, "\tLeaves: %u\n", stat->leaves); bytes = LEAF_SIZE * stat->leaves; seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); bytes += sizeof(struct fib_alias) * stat->prefixes; seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); bytes += TNODE_SIZE(0) * stat->tnodes; max = MAX_STAT_DEPTH; while (max > 0 && stat->nodesizes[max-1] == 0) max--; pointers = 0; for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<<i) * stat->nodesizes[i]; } seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); bytes += sizeof(struct key_vector *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } #ifdef CONFIG_IP_FIB_TRIE_STATS static void trie_show_usage(struct seq_file *seq, const struct trie_use_stats __percpu *stats) { struct trie_use_stats s = { 0 }; int cpu; /* loop through all of the CPUs and gather up the stats */ for_each_possible_cpu(cpu) { const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); s.gets += pcpu->gets; s.backtrack += pcpu->backtrack; s.semantic_match_passed += pcpu->semantic_match_passed; s.semantic_match_miss += pcpu->semantic_match_miss; s.null_node_hit += pcpu->null_node_hit; s.resize_node_skipped += pcpu->resize_node_skipped; } seq_printf(seq, "\nCounters:\n---------\n"); seq_printf(seq, "gets = %u\n", s.gets); seq_printf(seq, "backtracks = %u\n", s.backtrack); seq_printf(seq, "semantic match passed = %u\n", s.semantic_match_passed); seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); seq_printf(seq, "null node hit= %u\n", s.null_node_hit); seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); } #endif /* CONFIG_IP_FIB_TRIE_STATS */ static void fib_table_print(struct seq_file *seq, struct fib_table *tb) { if (tb->tb_id == RT_TABLE_LOCAL) seq_puts(seq, "Local:\n"); else if (tb->tb_id == RT_TABLE_MAIN) seq_puts(seq, "Main:\n"); else seq_printf(seq, "Id %d:\n", tb->tb_id); } static int fib_triestat_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; unsigned int h; seq_printf(seq, "Basic info: size of leaf:" " %zd bytes, size of tnode: %zd bytes.\n", LEAF_SIZE, TNODE_SIZE(0)); rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct trie *t = (struct trie *) tb->tb_data; struct trie_stat stat; if (!t) continue; fib_table_print(seq, tb); trie_collect_stats(t, &stat); trie_show_stats(seq, &stat); #ifdef CONFIG_IP_FIB_TRIE_STATS trie_show_usage(seq, t->stats); #endif } cond_resched_rcu(); } rcu_read_unlock(); return 0; } static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); loff_t idx = 0; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct key_vector *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); n; n = fib_trie_get_next(iter)) if (pos == idx++) { iter->tb = tb; return n; } } } return NULL; } static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return fib_trie_get_idx(seq, *pos); } static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; struct key_vector *n; ++*pos; /* next node in same table */ n = fib_trie_get_next(iter); if (n) return n; /* walk rest of this hash chain */ h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { tb = hlist_entry(tb_node, struct fib_table, tb_hlist); n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) goto found; } /* new hash chain */ while (++h < FIB_TABLE_HASHSZ) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, head, tb_hlist) { n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) goto found; } } return NULL; found: iter->tb = tb; return n; } static void fib_trie_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void seq_indent(struct seq_file *seq, int n) { while (n-- > 0) seq_puts(seq, " "); } static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) { switch (s) { case RT_SCOPE_UNIVERSE: return "universe"; case RT_SCOPE_SITE: return "site"; case RT_SCOPE_LINK: return "link"; case RT_SCOPE_HOST: return "host"; case RT_SCOPE_NOWHERE: return "nowhere"; default: snprintf(buf, len, "scope=%d", s); return buf; } } static const char *const rtn_type_names[__RTN_MAX] = { [RTN_UNSPEC] = "UNSPEC", [RTN_UNICAST] = "UNICAST", [RTN_LOCAL] = "LOCAL", [RTN_BROADCAST] = "BROADCAST", [RTN_ANYCAST] = "ANYCAST", [RTN_MULTICAST] = "MULTICAST", [RTN_BLACKHOLE] = "BLACKHOLE", [RTN_UNREACHABLE] = "UNREACHABLE", [RTN_PROHIBIT] = "PROHIBIT", [RTN_THROW] = "THROW", [RTN_NAT] = "NAT", [RTN_XRESOLVE] = "XRESOLVE", }; static inline const char *rtn_type(char *buf, size_t len, unsigned int t) { if (t < __RTN_MAX && rtn_type_names[t]) return rtn_type_names[t]; snprintf(buf, len, "type %u", t); return buf; } /* Pretty print the trie */ static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; struct key_vector *n = v; if (IS_TRIE(node_parent_rcu(n))) fib_table_print(seq, iter->tb); if (IS_TNODE(n)) { __be32 prf = htonl(n->key); seq_indent(seq, iter->depth-1); seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", &prf, KEYLENGTH - n->pos - n->bits, n->bits, tn_info(n)->full_children, tn_info(n)->empty_children); } else { __be32 val = htonl(n->key); struct fib_alias *fa; seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { char buf1[32], buf2[32]; seq_indent(seq, iter->depth + 1); seq_printf(seq, " /%zu %s %s", KEYLENGTH - fa->fa_slen, rtn_scope(buf1, sizeof(buf1), fa->fa_info->fib_scope), rtn_type(buf2, sizeof(buf2), fa->fa_type)); if (fa->fa_dscp) seq_printf(seq, " tos=%d", inet_dscp_to_dsfield(fa->fa_dscp)); seq_putc(seq, '\n'); } } return 0; } static const struct seq_operations fib_trie_seq_ops = { .start = fib_trie_seq_start, .next = fib_trie_seq_next, .stop = fib_trie_seq_stop, .show = fib_trie_seq_show, }; struct fib_route_iter { struct seq_net_private p; struct fib_table *main_tb; struct key_vector *tnode; loff_t pos; t_key key; }; static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) { struct key_vector *l, **tp = &iter->tnode; t_key key; /* use cached location of previously found key */ if (iter->pos > 0 && pos >= iter->pos) { key = iter->key; } else { iter->pos = 1; key = 0; } pos -= iter->pos; while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { key = l->key + 1; iter->pos++; l = NULL; /* handle unlikely case of a key wrap */ if (!key) break; } if (l) iter->key = l->key; /* remember it */ else iter->pos = 0; /* forget it */ return l; } static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct fib_route_iter *iter = seq->private; struct fib_table *tb; struct trie *t; rcu_read_lock(); tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); if (!tb) return NULL; iter->main_tb = tb; t = (struct trie *)tb->tb_data; iter->tnode = t->kv; if (*pos != 0) return fib_route_get_idx(iter, *pos); iter->pos = 0; iter->key = KEY_MAX; return SEQ_START_TOKEN; } static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct key_vector *l = NULL; t_key key = iter->key + 1; ++*pos; /* only allow key of 0 for start of sequence */ if ((v == SEQ_START_TOKEN) || key) l = leaf_walk_rcu(&iter->tnode, key); if (l) { iter->key = l->key; iter->pos++; } else { iter->pos = 0; } return l; } static void fib_route_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi) { unsigned int flags = 0; if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) flags = RTF_REJECT; if (fi) { const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); if (nhc->nhc_gw.ipv4) flags |= RTF_GATEWAY; } if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; flags |= RTF_UP; return flags; } /* * This outputs /proc/net/route. * The format of the file is not supposed to be changed * and needs to be same as fib_hash output to avoid breaking * legacy utilities */ static int fib_route_seq_show(struct seq_file *seq, void *v) { struct fib_route_iter *iter = seq->private; struct fib_table *tb = iter->main_tb; struct fib_alias *fa; struct key_vector *l = v; __be32 prefix; if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" "\tWindow\tIRTT"); return 0; } prefix = htonl(l->key); hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { struct fib_info *fi = fa->fa_info; __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); if ((fa->fa_type == RTN_BROADCAST) || (fa->fa_type == RTN_MULTICAST)) continue; if (fa->tb_id != tb->tb_id) continue; seq_setwidth(seq, 127); if (fi) { struct fib_nh_common *nhc = fib_info_nhc(fi, 0); __be32 gw = 0; if (nhc->nhc_gw_family == AF_INET) gw = nhc->nhc_gw.ipv4; seq_printf(seq, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" "%d\t%08X\t%d\t%u\t%u", nhc->nhc_dev ? nhc->nhc_dev->name : "*", prefix, gw, flags, 0, 0, fi->fib_priority, mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), fi->fib_window, fi->fib_rtt >> 3); } else { seq_printf(seq, "*\t%08X\t%08X\t%04X\t%d\t%u\t" "%d\t%08X\t%d\t%u\t%u", prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0); } seq_pad(seq, '\n'); } return 0; } static const struct seq_operations fib_route_seq_ops = { .start = fib_route_seq_start, .next = fib_route_seq_next, .stop = fib_route_seq_stop, .show = fib_route_seq_show, }; int __net_init fib_proc_init(struct net *net) { if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops, sizeof(struct fib_trie_iter))) goto out1; if (!proc_create_net_single("fib_triestat", 0444, net->proc_net, fib_triestat_seq_show, NULL)) goto out2; if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops, sizeof(struct fib_route_iter))) goto out3; return 0; out3: remove_proc_entry("fib_triestat", net->proc_net); out2: remove_proc_entry("fib_trie", net->proc_net); out1: return -ENOMEM; } void __net_exit fib_proc_exit(struct net *net) { remove_proc_entry("fib_trie", net->proc_net); remove_proc_entry("fib_triestat", net->proc_net); remove_proc_entry("route", net->proc_net); } #endif /* CONFIG_PROC_FS */ |
22 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM alarmtimer #if !defined(_TRACE_ALARMTIMER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_ALARMTIMER_H #include <linux/alarmtimer.h> #include <linux/rtc.h> #include <linux/tracepoint.h> TRACE_DEFINE_ENUM(ALARM_REALTIME); TRACE_DEFINE_ENUM(ALARM_BOOTTIME); TRACE_DEFINE_ENUM(ALARM_REALTIME_FREEZER); TRACE_DEFINE_ENUM(ALARM_BOOTTIME_FREEZER); #define show_alarm_type(type) __print_flags(type, " | ", \ { 1 << ALARM_REALTIME, "REALTIME" }, \ { 1 << ALARM_BOOTTIME, "BOOTTIME" }, \ { 1 << ALARM_REALTIME_FREEZER, "REALTIME Freezer" }, \ { 1 << ALARM_BOOTTIME_FREEZER, "BOOTTIME Freezer" }) TRACE_EVENT(alarmtimer_suspend, TP_PROTO(ktime_t expires, int flag), TP_ARGS(expires, flag), TP_STRUCT__entry( __field(s64, expires) __field(unsigned char, alarm_type) ), TP_fast_assign( __entry->expires = expires; __entry->alarm_type = flag; ), TP_printk("alarmtimer type:%s expires:%llu", show_alarm_type((1 << __entry->alarm_type)), __entry->expires ) ); DECLARE_EVENT_CLASS(alarm_class, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now), TP_STRUCT__entry( __field(void *, alarm) __field(unsigned char, alarm_type) __field(s64, expires) __field(s64, now) ), TP_fast_assign( __entry->alarm = alarm; __entry->alarm_type = alarm->type; __entry->expires = alarm->node.expires; __entry->now = now; ), TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu", __entry->alarm, show_alarm_type((1 << __entry->alarm_type)), __entry->expires, __entry->now ) ); DEFINE_EVENT(alarm_class, alarmtimer_fired, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); DEFINE_EVENT(alarm_class, alarmtimer_start, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); DEFINE_EVENT(alarm_class, alarmtimer_cancel, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); #endif /* _TRACE_ALARMTIMER_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | /* * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de> * * For licencing details see kernel-base/COPYING */ #include <linux/init.h> #include <linux/ioport.h> #include <linux/export.h> #include <linux/pci.h> #include <asm/acpi.h> #include <asm/bios_ebda.h> #include <asm/paravirt.h> #include <asm/pci_x86.h> #include <asm/mpspec.h> #include <asm/setup.h> #include <asm/apic.h> #include <asm/e820/api.h> #include <asm/time.h> #include <asm/irq.h> #include <asm/io_apic.h> #include <asm/hpet.h> #include <asm/memtype.h> #include <asm/tsc.h> #include <asm/iommu.h> #include <asm/mach_traps.h> #include <asm/irqdomain.h> #include <asm/realmode.h> void x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } static int __init iommu_init_noop(void) { return 0; } static void iommu_shutdown_noop(void) { } bool __init bool_x86_init_noop(void) { return false; } void x86_op_int_noop(int cpu) { } int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } void get_rtc_noop(struct timespec64 *now) { } static __initconst const struct of_device_id of_cmos_match[] = { { .compatible = "motorola,mc146818" }, {} }; /* * Allow devicetree configured systems to disable the RTC by setting the * corresponding DT node's status property to disabled. Code is optimized * out for CONFIG_OF=n builds. */ static __init void x86_wallclock_init(void) { struct device_node *node = of_find_matching_node(NULL, of_cmos_match); if (node && !of_device_is_available(node)) { x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; } } /* * The platform setup functions are preset with the default functions * for standard PC hardware. */ struct x86_init_ops x86_init __initdata = { .resources = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, }, .mpparse = { .setup_ioapic_ids = x86_init_noop, .find_smp_config = default_find_smp_config, .get_smp_config = default_get_smp_config, }, .irqs = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init, .create_pci_msi_domain = native_create_pci_msi_domain, }, .oem = { .arch_setup = x86_init_noop, .banner = default_banner, }, .paging = { .pagetable_init = native_pagetable_init, }, .timers = { .setup_percpu_clockev = setup_boot_APIC_clock, .timer_init = hpet_time_init, .wallclock_init = x86_wallclock_init, }, .iommu = { .iommu_init = iommu_init_noop, }, .pci = { .init = x86_default_pci_init, .init_irq = x86_default_pci_init_irq, .fixup_irqs = x86_default_pci_fixup_irqs, }, .hyper = { .init_platform = x86_init_noop, .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, .msi_ext_dest_id = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, .init_after_bootmem = x86_init_noop, }, .acpi = { .set_root_pointer = x86_default_set_root_pointer, .get_root_pointer = x86_default_get_root_pointer, .reduced_hw_early_init = acpi_generic_reduced_hw_init, }, }; struct x86_cpuinit_ops x86_cpuinit = { .early_percpu_clock_init = x86_init_noop, .setup_percpu_clockev = setup_secondary_APIC_clock, .parallel_bringup = true, }; static void default_nmi_init(void) { }; static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } static bool is_private_mmio_noop(u64 addr) {return false; } struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_cmos_time, .iommu_shutdown = iommu_shutdown_noop, .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, .get_nmi_reason = default_get_nmi_reason, .save_sched_clock_state = tsc_save_sched_clock_state, .restore_sched_clock_state = tsc_restore_sched_clock_state, .realmode_reserve = reserve_real_mode, .realmode_init = init_real_mode, .hyper.pin_vcpu = x86_op_int_noop, .hyper.is_private_mmio = is_private_mmio_noop, .guest = { .enc_status_change_prepare = enc_status_change_prepare_noop, .enc_status_change_finish = enc_status_change_finish_noop, .enc_tlb_flush_required = enc_tlb_flush_required_noop, .enc_cache_flush_required = enc_cache_flush_required_noop, }, }; EXPORT_SYMBOL_GPL(x86_platform); struct x86_apic_ops x86_apic_ops __ro_after_init = { .io_apic_read = native_io_apic_read, .restore = native_restore_boot_irq_mode, }; |
14 14 4 4 10 20 7 4 4 4 4 4 4 4 4 2 3 3 13 7 9 1 13 13 6 7 13 13 13 1 12 8 5 1 12 8 5 1 1 13 1 6 6 1 11 7 5 4 1 4 3 3 4 4 4 4 1 4 2 3 1 1 3 1 1 1 4 2 2 7 7 1 5 2 2 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 18 1 1 1 1 2 1 1 4 2 1 1 1 2 1 1 1 8 10 2 2 25 3 12 2 8 1 1 1 1 2 2 1 6 5 1 6 6 6 8 8 4 4 4 4 1 3 12 1 11 11 10 10 1 11 10 10 5 5 6 6 5 5 6 6 5 6 6 6 6 6 6 6 6 3 3 3 3 3 3 3 3 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Abstract layer for MIDI v1.0 stream * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <sound/core.h> #include <linux/major.h> #include <linux/init.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/wait.h> #include <linux/mutex.h> #include <linux/module.h> #include <linux/delay.h> #include <linux/mm.h> #include <linux/nospec.h> #include <sound/rawmidi.h> #include <sound/info.h> #include <sound/control.h> #include <sound/minors.h> #include <sound/initval.h> #include <sound/ump.h> MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("Midlevel RawMidi code for ALSA."); MODULE_LICENSE("GPL"); #ifdef CONFIG_SND_OSSEMUL static int midi_map[SNDRV_CARDS]; static int amidi_map[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 1}; module_param_array(midi_map, int, NULL, 0444); MODULE_PARM_DESC(midi_map, "Raw MIDI device number assigned to 1st OSS device."); module_param_array(amidi_map, int, NULL, 0444); MODULE_PARM_DESC(amidi_map, "Raw MIDI device number assigned to 2nd OSS device."); #endif /* CONFIG_SND_OSSEMUL */ static int snd_rawmidi_dev_free(struct snd_device *device); static int snd_rawmidi_dev_register(struct snd_device *device); static int snd_rawmidi_dev_disconnect(struct snd_device *device); static LIST_HEAD(snd_rawmidi_devices); static DEFINE_MUTEX(register_mutex); #define rmidi_err(rmidi, fmt, args...) \ dev_err((rmidi)->dev, fmt, ##args) #define rmidi_warn(rmidi, fmt, args...) \ dev_warn((rmidi)->dev, fmt, ##args) #define rmidi_dbg(rmidi, fmt, args...) \ dev_dbg((rmidi)->dev, fmt, ##args) struct snd_rawmidi_status32 { s32 stream; s32 tstamp_sec; /* Timestamp */ s32 tstamp_nsec; u32 avail; /* available bytes */ u32 xruns; /* count of overruns since last status (in bytes) */ unsigned char reserved[16]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_IOCTL_STATUS32 _IOWR('W', 0x20, struct snd_rawmidi_status32) struct snd_rawmidi_status64 { int stream; u8 rsvd[4]; /* alignment */ s64 tstamp_sec; /* Timestamp */ s64 tstamp_nsec; size_t avail; /* available bytes */ size_t xruns; /* count of overruns since last status (in bytes) */ unsigned char reserved[16]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_IOCTL_STATUS64 _IOWR('W', 0x20, struct snd_rawmidi_status64) #define rawmidi_is_ump(rmidi) \ (IS_ENABLED(CONFIG_SND_UMP) && ((rmidi)->info_flags & SNDRV_RAWMIDI_INFO_UMP)) static struct snd_rawmidi *snd_rawmidi_search(struct snd_card *card, int device) { struct snd_rawmidi *rawmidi; list_for_each_entry(rawmidi, &snd_rawmidi_devices, list) if (rawmidi->card == card && rawmidi->device == device) return rawmidi; return NULL; } static inline unsigned short snd_rawmidi_file_flags(struct file *file) { switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) { case FMODE_WRITE: return SNDRV_RAWMIDI_LFLG_OUTPUT; case FMODE_READ: return SNDRV_RAWMIDI_LFLG_INPUT; default: return SNDRV_RAWMIDI_LFLG_OPEN; } } static inline bool __snd_rawmidi_ready(struct snd_rawmidi_runtime *runtime) { return runtime->avail >= runtime->avail_min; } static bool snd_rawmidi_ready(struct snd_rawmidi_substream *substream) { unsigned long flags; bool ready; spin_lock_irqsave(&substream->lock, flags); ready = __snd_rawmidi_ready(substream->runtime); spin_unlock_irqrestore(&substream->lock, flags); return ready; } static inline int snd_rawmidi_ready_append(struct snd_rawmidi_substream *substream, size_t count) { struct snd_rawmidi_runtime *runtime = substream->runtime; return runtime->avail >= runtime->avail_min && (!substream->append || runtime->avail >= count); } static void snd_rawmidi_input_event_work(struct work_struct *work) { struct snd_rawmidi_runtime *runtime = container_of(work, struct snd_rawmidi_runtime, event_work); if (runtime->event) runtime->event(runtime->substream); } /* buffer refcount management: call with substream->lock held */ static inline void snd_rawmidi_buffer_ref(struct snd_rawmidi_runtime *runtime) { runtime->buffer_ref++; } static inline void snd_rawmidi_buffer_unref(struct snd_rawmidi_runtime *runtime) { runtime->buffer_ref--; } static void snd_rawmidi_buffer_ref_sync(struct snd_rawmidi_substream *substream) { int loop = HZ; spin_lock_irq(&substream->lock); while (substream->runtime->buffer_ref) { spin_unlock_irq(&substream->lock); if (!--loop) { rmidi_err(substream->rmidi, "Buffer ref sync timeout\n"); return; } schedule_timeout_uninterruptible(1); spin_lock_irq(&substream->lock); } spin_unlock_irq(&substream->lock); } static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream) { struct snd_rawmidi_runtime *runtime; runtime = kzalloc(sizeof(*runtime), GFP_KERNEL); if (!runtime) return -ENOMEM; runtime->substream = substream; init_waitqueue_head(&runtime->sleep); INIT_WORK(&runtime->event_work, snd_rawmidi_input_event_work); runtime->event = NULL; runtime->buffer_size = PAGE_SIZE; runtime->avail_min = 1; if (substream->stream == SNDRV_RAWMIDI_STREAM_INPUT) runtime->avail = 0; else runtime->avail = runtime->buffer_size; runtime->buffer = kvzalloc(runtime->buffer_size, GFP_KERNEL); if (!runtime->buffer) { kfree(runtime); return -ENOMEM; } runtime->appl_ptr = runtime->hw_ptr = 0; substream->runtime = runtime; if (rawmidi_is_ump(substream->rmidi)) runtime->align = 3; return 0; } /* get the current alignment (either 0 or 3) */ static inline int get_align(struct snd_rawmidi_runtime *runtime) { if (IS_ENABLED(CONFIG_SND_UMP)) return runtime->align; else return 0; } /* get the trimmed size with the current alignment */ #define get_aligned_size(runtime, size) ((size) & ~get_align(runtime)) static int snd_rawmidi_runtime_free(struct snd_rawmidi_substream *substream) { struct snd_rawmidi_runtime *runtime = substream->runtime; kvfree(runtime->buffer); kfree(runtime); substream->runtime = NULL; return 0; } static inline void snd_rawmidi_output_trigger(struct snd_rawmidi_substream *substream, int up) { if (!substream->opened) return; substream->ops->trigger(substream, up); } static void snd_rawmidi_input_trigger(struct snd_rawmidi_substream *substream, int up) { if (!substream->opened) return; substream->ops->trigger(substream, up); if (!up) cancel_work_sync(&substream->runtime->event_work); } static void __reset_runtime_ptrs(struct snd_rawmidi_runtime *runtime, bool is_input) { runtime->drain = 0; runtime->appl_ptr = runtime->hw_ptr = 0; runtime->avail = is_input ? 0 : runtime->buffer_size; } static void reset_runtime_ptrs(struct snd_rawmidi_substream *substream, bool is_input) { unsigned long flags; spin_lock_irqsave(&substream->lock, flags); if (substream->opened && substream->runtime) __reset_runtime_ptrs(substream->runtime, is_input); spin_unlock_irqrestore(&substream->lock, flags); } int snd_rawmidi_drop_output(struct snd_rawmidi_substream *substream) { snd_rawmidi_output_trigger(substream, 0); reset_runtime_ptrs(substream, false); return 0; } EXPORT_SYMBOL(snd_rawmidi_drop_output); int snd_rawmidi_drain_output(struct snd_rawmidi_substream *substream) { int err = 0; long timeout; struct snd_rawmidi_runtime *runtime; spin_lock_irq(&substream->lock); runtime = substream->runtime; if (!substream->opened || !runtime || !runtime->buffer) { err = -EINVAL; } else { snd_rawmidi_buffer_ref(runtime); runtime->drain = 1; } spin_unlock_irq(&substream->lock); if (err < 0) return err; timeout = wait_event_interruptible_timeout(runtime->sleep, (runtime->avail >= runtime->buffer_size), 10*HZ); spin_lock_irq(&substream->lock); if (signal_pending(current)) err = -ERESTARTSYS; if (runtime->avail < runtime->buffer_size && !timeout) { rmidi_warn(substream->rmidi, "rawmidi drain error (avail = %li, buffer_size = %li)\n", (long)runtime->avail, (long)runtime->buffer_size); err = -EIO; } runtime->drain = 0; spin_unlock_irq(&substream->lock); if (err != -ERESTARTSYS) { /* we need wait a while to make sure that Tx FIFOs are empty */ if (substream->ops->drain) substream->ops->drain(substream); else msleep(50); snd_rawmidi_drop_output(substream); } spin_lock_irq(&substream->lock); snd_rawmidi_buffer_unref(runtime); spin_unlock_irq(&substream->lock); return err; } EXPORT_SYMBOL(snd_rawmidi_drain_output); int snd_rawmidi_drain_input(struct snd_rawmidi_substream *substream) { snd_rawmidi_input_trigger(substream, 0); reset_runtime_ptrs(substream, true); return 0; } EXPORT_SYMBOL(snd_rawmidi_drain_input); /* look for an available substream for the given stream direction; * if a specific subdevice is given, try to assign it */ static int assign_substream(struct snd_rawmidi *rmidi, int subdevice, int stream, int mode, struct snd_rawmidi_substream **sub_ret) { struct snd_rawmidi_substream *substream; struct snd_rawmidi_str *s = &rmidi->streams[stream]; static const unsigned int info_flags[2] = { [SNDRV_RAWMIDI_STREAM_OUTPUT] = SNDRV_RAWMIDI_INFO_OUTPUT, [SNDRV_RAWMIDI_STREAM_INPUT] = SNDRV_RAWMIDI_INFO_INPUT, }; if (!(rmidi->info_flags & info_flags[stream])) return -ENXIO; if (subdevice >= 0 && subdevice >= s->substream_count) return -ENODEV; list_for_each_entry(substream, &s->substreams, list) { if (substream->opened) { if (stream == SNDRV_RAWMIDI_STREAM_INPUT || !(mode & SNDRV_RAWMIDI_LFLG_APPEND) || !substream->append) continue; } if (subdevice < 0 || subdevice == substream->number) { *sub_ret = substream; return 0; } } return -EAGAIN; } /* open and do ref-counting for the given substream */ static int open_substream(struct snd_rawmidi *rmidi, struct snd_rawmidi_substream *substream, int mode) { int err; if (substream->use_count == 0) { err = snd_rawmidi_runtime_create(substream); if (err < 0) return err; err = substream->ops->open(substream); if (err < 0) { snd_rawmidi_runtime_free(substream); return err; } spin_lock_irq(&substream->lock); substream->opened = 1; substream->active_sensing = 0; if (mode & SNDRV_RAWMIDI_LFLG_APPEND) substream->append = 1; substream->pid = get_pid(task_pid(current)); rmidi->streams[substream->stream].substream_opened++; spin_unlock_irq(&substream->lock); } substream->use_count++; return 0; } static void close_substream(struct snd_rawmidi *rmidi, struct snd_rawmidi_substream *substream, int cleanup); static int rawmidi_open_priv(struct snd_rawmidi *rmidi, int subdevice, int mode, struct snd_rawmidi_file *rfile) { struct snd_rawmidi_substream *sinput = NULL, *soutput = NULL; int err; rfile->input = rfile->output = NULL; if (mode & SNDRV_RAWMIDI_LFLG_INPUT) { err = assign_substream(rmidi, subdevice, SNDRV_RAWMIDI_STREAM_INPUT, mode, &sinput); if (err < 0) return err; } if (mode & SNDRV_RAWMIDI_LFLG_OUTPUT) { err = assign_substream(rmidi, subdevice, SNDRV_RAWMIDI_STREAM_OUTPUT, mode, &soutput); if (err < 0) return err; } if (sinput) { err = open_substream(rmidi, sinput, mode); if (err < 0) return err; } if (soutput) { err = open_substream(rmidi, soutput, mode); if (err < 0) { if (sinput) close_substream(rmidi, sinput, 0); return err; } } rfile->rmidi = rmidi; rfile->input = sinput; rfile->output = soutput; return 0; } /* called from sound/core/seq/seq_midi.c */ int snd_rawmidi_kernel_open(struct snd_rawmidi *rmidi, int subdevice, int mode, struct snd_rawmidi_file *rfile) { int err; if (snd_BUG_ON(!rfile)) return -EINVAL; if (!try_module_get(rmidi->card->module)) return -ENXIO; mutex_lock(&rmidi->open_mutex); err = rawmidi_open_priv(rmidi, subdevice, mode, rfile); mutex_unlock(&rmidi->open_mutex); if (err < 0) module_put(rmidi->card->module); return err; } EXPORT_SYMBOL(snd_rawmidi_kernel_open); static int snd_rawmidi_open(struct inode *inode, struct file *file) { int maj = imajor(inode); struct snd_card *card; int subdevice; unsigned short fflags; int err; struct snd_rawmidi *rmidi; struct snd_rawmidi_file *rawmidi_file = NULL; wait_queue_entry_t wait; if ((file->f_flags & O_APPEND) && !(file->f_flags & O_NONBLOCK)) return -EINVAL; /* invalid combination */ err = stream_open(inode, file); if (err < 0) return err; if (maj == snd_major) { rmidi = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_RAWMIDI); #ifdef CONFIG_SND_OSSEMUL } else if (maj == SOUND_MAJOR) { rmidi = snd_lookup_oss_minor_data(iminor(inode), SNDRV_OSS_DEVICE_TYPE_MIDI); #endif } else return -ENXIO; if (rmidi == NULL) return -ENODEV; if (!try_module_get(rmidi->card->module)) { snd_card_unref(rmidi->card); return -ENXIO; } mutex_lock(&rmidi->open_mutex); card = rmidi->card; err = snd_card_file_add(card, file); if (err < 0) goto __error_card; fflags = snd_rawmidi_file_flags(file); if ((file->f_flags & O_APPEND) || maj == SOUND_MAJOR) /* OSS emul? */ fflags |= SNDRV_RAWMIDI_LFLG_APPEND; rawmidi_file = kmalloc(sizeof(*rawmidi_file), GFP_KERNEL); if (rawmidi_file == NULL) { err = -ENOMEM; goto __error; } rawmidi_file->user_pversion = 0; init_waitqueue_entry(&wait, current); add_wait_queue(&rmidi->open_wait, &wait); while (1) { subdevice = snd_ctl_get_preferred_subdevice(card, SND_CTL_SUBDEV_RAWMIDI); err = rawmidi_open_priv(rmidi, subdevice, fflags, rawmidi_file); if (err >= 0) break; if (err == -EAGAIN) { if (file->f_flags & O_NONBLOCK) { err = -EBUSY; break; } } else break; set_current_state(TASK_INTERRUPTIBLE); mutex_unlock(&rmidi->open_mutex); schedule(); mutex_lock(&rmidi->open_mutex); if (rmidi->card->shutdown) { err = -ENODEV; break; } if (signal_pending(current)) { err = -ERESTARTSYS; break; } } remove_wait_queue(&rmidi->open_wait, &wait); if (err < 0) { kfree(rawmidi_file); goto __error; } #ifdef CONFIG_SND_OSSEMUL if (rawmidi_file->input && rawmidi_file->input->runtime) rawmidi_file->input->runtime->oss = (maj == SOUND_MAJOR); if (rawmidi_file->output && rawmidi_file->output->runtime) rawmidi_file->output->runtime->oss = (maj == SOUND_MAJOR); #endif file->private_data = rawmidi_file; mutex_unlock(&rmidi->open_mutex); snd_card_unref(rmidi->card); return 0; __error: snd_card_file_remove(card, file); __error_card: mutex_unlock(&rmidi->open_mutex); module_put(rmidi->card->module); snd_card_unref(rmidi->card); return err; } static void close_substream(struct snd_rawmidi *rmidi, struct snd_rawmidi_substream *substream, int cleanup) { if (--substream->use_count) return; if (cleanup) { if (substream->stream == SNDRV_RAWMIDI_STREAM_INPUT) snd_rawmidi_input_trigger(substream, 0); else { if (substream->active_sensing) { unsigned char buf = 0xfe; /* sending single active sensing message * to shut the device up */ snd_rawmidi_kernel_write(substream, &buf, 1); } if (snd_rawmidi_drain_output(substream) == -ERESTARTSYS) snd_rawmidi_output_trigger(substream, 0); } snd_rawmidi_buffer_ref_sync(substream); } spin_lock_irq(&substream->lock); substream->opened = 0; substream->append = 0; spin_unlock_irq(&substream->lock); substream->ops->close(substream); if (substream->runtime->private_free) substream->runtime->private_free(substream); snd_rawmidi_runtime_free(substream); put_pid(substream->pid); substream->pid = NULL; rmidi->streams[substream->stream].substream_opened--; } static void rawmidi_release_priv(struct snd_rawmidi_file *rfile) { struct snd_rawmidi *rmidi; rmidi = rfile->rmidi; mutex_lock(&rmidi->open_mutex); if (rfile->input) { close_substream(rmidi, rfile->input, 1); rfile->input = NULL; } if (rfile->output) { close_substream(rmidi, rfile->output, 1); rfile->output = NULL; } rfile->rmidi = NULL; mutex_unlock(&rmidi->open_mutex); wake_up(&rmidi->open_wait); } /* called from sound/core/seq/seq_midi.c */ int snd_rawmidi_kernel_release(struct snd_rawmidi_file *rfile) { struct snd_rawmidi *rmidi; if (snd_BUG_ON(!rfile)) return -ENXIO; rmidi = rfile->rmidi; rawmidi_release_priv(rfile); module_put(rmidi->card->module); return 0; } EXPORT_SYMBOL(snd_rawmidi_kernel_release); static int snd_rawmidi_release(struct inode *inode, struct file *file) { struct snd_rawmidi_file *rfile; struct snd_rawmidi *rmidi; struct module *module; rfile = file->private_data; rmidi = rfile->rmidi; rawmidi_release_priv(rfile); kfree(rfile); module = rmidi->card->module; snd_card_file_remove(rmidi->card, file); module_put(module); return 0; } static int snd_rawmidi_info(struct snd_rawmidi_substream *substream, struct snd_rawmidi_info *info) { struct snd_rawmidi *rmidi; if (substream == NULL) return -ENODEV; rmidi = substream->rmidi; memset(info, 0, sizeof(*info)); info->card = rmidi->card->number; info->device = rmidi->device; info->subdevice = substream->number; info->stream = substream->stream; info->flags = rmidi->info_flags; strcpy(info->id, rmidi->id); strcpy(info->name, rmidi->name); strcpy(info->subname, substream->name); info->subdevices_count = substream->pstr->substream_count; info->subdevices_avail = (substream->pstr->substream_count - substream->pstr->substream_opened); return 0; } static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream, struct snd_rawmidi_info __user *_info) { struct snd_rawmidi_info info; int err; err = snd_rawmidi_info(substream, &info); if (err < 0) return err; if (copy_to_user(_info, &info, sizeof(struct snd_rawmidi_info))) return -EFAULT; return 0; } static int __snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) { struct snd_rawmidi *rmidi; struct snd_rawmidi_str *pstr; struct snd_rawmidi_substream *substream; rmidi = snd_rawmidi_search(card, info->device); if (!rmidi) return -ENXIO; if (info->stream < 0 || info->stream > 1) return -EINVAL; info->stream = array_index_nospec(info->stream, 2); pstr = &rmidi->streams[info->stream]; if (pstr->substream_count == 0) return -ENOENT; if (info->subdevice >= pstr->substream_count) return -ENXIO; list_for_each_entry(substream, &pstr->substreams, list) { if ((unsigned int)substream->number == info->subdevice) return snd_rawmidi_info(substream, info); } return -ENXIO; } int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) { int ret; mutex_lock(®ister_mutex); ret = __snd_rawmidi_info_select(card, info); mutex_unlock(®ister_mutex); return ret; } EXPORT_SYMBOL(snd_rawmidi_info_select); static int snd_rawmidi_info_select_user(struct snd_card *card, struct snd_rawmidi_info __user *_info) { int err; struct snd_rawmidi_info info; if (get_user(info.device, &_info->device)) return -EFAULT; if (get_user(info.stream, &_info->stream)) return -EFAULT; if (get_user(info.subdevice, &_info->subdevice)) return -EFAULT; err = snd_rawmidi_info_select(card, &info); if (err < 0) return err; if (copy_to_user(_info, &info, sizeof(struct snd_rawmidi_info))) return -EFAULT; return 0; } static int resize_runtime_buffer(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params *params, bool is_input) { struct snd_rawmidi_runtime *runtime = substream->runtime; char *newbuf, *oldbuf; unsigned int framing = params->mode & SNDRV_RAWMIDI_MODE_FRAMING_MASK; if (params->buffer_size < 32 || params->buffer_size > 1024L * 1024L) return -EINVAL; if (framing == SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP && (params->buffer_size & 0x1f) != 0) return -EINVAL; if (params->avail_min < 1 || params->avail_min > params->buffer_size) return -EINVAL; if (params->buffer_size & get_align(runtime)) return -EINVAL; if (params->buffer_size != runtime->buffer_size) { newbuf = kvzalloc(params->buffer_size, GFP_KERNEL); if (!newbuf) return -ENOMEM; spin_lock_irq(&substream->lock); if (runtime->buffer_ref) { spin_unlock_irq(&substream->lock); kvfree(newbuf); return -EBUSY; } oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; __reset_runtime_ptrs(runtime, is_input); spin_unlock_irq(&substream->lock); kvfree(oldbuf); } runtime->avail_min = params->avail_min; return 0; } int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params *params) { int err; snd_rawmidi_drain_output(substream); mutex_lock(&substream->rmidi->open_mutex); if (substream->append && substream->use_count > 1) err = -EBUSY; else err = resize_runtime_buffer(substream, params, false); if (!err) substream->active_sensing = !params->no_active_sensing; mutex_unlock(&substream->rmidi->open_mutex); return err; } EXPORT_SYMBOL(snd_rawmidi_output_params); int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params *params) { unsigned int framing = params->mode & SNDRV_RAWMIDI_MODE_FRAMING_MASK; unsigned int clock_type = params->mode & SNDRV_RAWMIDI_MODE_CLOCK_MASK; int err; snd_rawmidi_drain_input(substream); mutex_lock(&substream->rmidi->open_mutex); if (framing == SNDRV_RAWMIDI_MODE_FRAMING_NONE && clock_type != SNDRV_RAWMIDI_MODE_CLOCK_NONE) err = -EINVAL; else if (clock_type > SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW) err = -EINVAL; else if (framing > SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP) err = -EINVAL; else err = resize_runtime_buffer(substream, params, true); if (!err) { substream->framing = framing; substream->clock_type = clock_type; } mutex_unlock(&substream->rmidi->open_mutex); return 0; } EXPORT_SYMBOL(snd_rawmidi_input_params); static int snd_rawmidi_output_status(struct snd_rawmidi_substream *substream, struct snd_rawmidi_status64 *status) { struct snd_rawmidi_runtime *runtime = substream->runtime; memset(status, 0, sizeof(*status)); status->stream = SNDRV_RAWMIDI_STREAM_OUTPUT; spin_lock_irq(&substream->lock); status->avail = runtime->avail; spin_unlock_irq(&substream->lock); return 0; } static int snd_rawmidi_input_status(struct snd_rawmidi_substream *substream, struct snd_rawmidi_status64 *status) { struct snd_rawmidi_runtime *runtime = substream->runtime; memset(status, 0, sizeof(*status)); status->stream = SNDRV_RAWMIDI_STREAM_INPUT; spin_lock_irq(&substream->lock); status->avail = runtime->avail; status->xruns = runtime->xruns; runtime->xruns = 0; spin_unlock_irq(&substream->lock); return 0; } static int snd_rawmidi_ioctl_status32(struct snd_rawmidi_file *rfile, struct snd_rawmidi_status32 __user *argp) { int err = 0; struct snd_rawmidi_status32 __user *status = argp; struct snd_rawmidi_status32 status32; struct snd_rawmidi_status64 status64; if (copy_from_user(&status32, argp, sizeof(struct snd_rawmidi_status32))) return -EFAULT; switch (status32.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: if (rfile->output == NULL) return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status64); break; case SNDRV_RAWMIDI_STREAM_INPUT: if (rfile->input == NULL) return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status64); break; default: return -EINVAL; } if (err < 0) return err; status32 = (struct snd_rawmidi_status32) { .stream = status64.stream, .tstamp_sec = status64.tstamp_sec, .tstamp_nsec = status64.tstamp_nsec, .avail = status64.avail, .xruns = status64.xruns, }; if (copy_to_user(status, &status32, sizeof(*status))) return -EFAULT; return 0; } static int snd_rawmidi_ioctl_status64(struct snd_rawmidi_file *rfile, struct snd_rawmidi_status64 __user *argp) { int err = 0; struct snd_rawmidi_status64 status; if (copy_from_user(&status, argp, sizeof(struct snd_rawmidi_status64))) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: if (rfile->output == NULL) return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: if (rfile->input == NULL) return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: return -EINVAL; } if (err < 0) return err; if (copy_to_user(argp, &status, sizeof(struct snd_rawmidi_status64))) return -EFAULT; return 0; } static long snd_rawmidi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_rawmidi_file *rfile; struct snd_rawmidi *rmidi; void __user *argp = (void __user *)arg; rfile = file->private_data; if (((cmd >> 8) & 0xff) != 'W') return -ENOTTY; switch (cmd) { case SNDRV_RAWMIDI_IOCTL_PVERSION: return put_user(SNDRV_RAWMIDI_VERSION, (int __user *)argp) ? -EFAULT : 0; case SNDRV_RAWMIDI_IOCTL_INFO: { int stream; struct snd_rawmidi_info __user *info = argp; if (get_user(stream, &info->stream)) return -EFAULT; switch (stream) { case SNDRV_RAWMIDI_STREAM_INPUT: return snd_rawmidi_info_user(rfile->input, info); case SNDRV_RAWMIDI_STREAM_OUTPUT: return snd_rawmidi_info_user(rfile->output, info); default: return -EINVAL; } } case SNDRV_RAWMIDI_IOCTL_USER_PVERSION: if (get_user(rfile->user_pversion, (unsigned int __user *)arg)) return -EFAULT; return 0; case SNDRV_RAWMIDI_IOCTL_PARAMS: { struct snd_rawmidi_params params; if (copy_from_user(¶ms, argp, sizeof(struct snd_rawmidi_params))) return -EFAULT; if (rfile->user_pversion < SNDRV_PROTOCOL_VERSION(2, 0, 2)) { params.mode = 0; memset(params.reserved, 0, sizeof(params.reserved)); } switch (params.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: if (rfile->output == NULL) return -EINVAL; return snd_rawmidi_output_params(rfile->output, ¶ms); case SNDRV_RAWMIDI_STREAM_INPUT: if (rfile->input == NULL) return -EINVAL; return snd_rawmidi_input_params(rfile->input, ¶ms); default: return -EINVAL; } } case SNDRV_RAWMIDI_IOCTL_STATUS32: return snd_rawmidi_ioctl_status32(rfile, argp); case SNDRV_RAWMIDI_IOCTL_STATUS64: return snd_rawmidi_ioctl_status64(rfile, argp); case SNDRV_RAWMIDI_IOCTL_DROP: { int val; if (get_user(val, (int __user *) argp)) return -EFAULT; switch (val) { case SNDRV_RAWMIDI_STREAM_OUTPUT: if (rfile->output == NULL) return -EINVAL; return snd_rawmidi_drop_output(rfile->output); default: return -EINVAL; } } case SNDRV_RAWMIDI_IOCTL_DRAIN: { int val; if (get_user(val, (int __user *) argp)) return -EFAULT; switch (val) { case SNDRV_RAWMIDI_STREAM_OUTPUT: if (rfile->output == NULL) return -EINVAL; return snd_rawmidi_drain_output(rfile->output); case SNDRV_RAWMIDI_STREAM_INPUT: if (rfile->input == NULL) return -EINVAL; return snd_rawmidi_drain_input(rfile->input); default: return -EINVAL; } } default: rmidi = rfile->rmidi; if (rmidi->ops && rmidi->ops->ioctl) return rmidi->ops->ioctl(rmidi, cmd, argp); rmidi_dbg(rmidi, "rawmidi: unknown command = 0x%x\n", cmd); } return -ENOTTY; } /* ioctl to find the next device; either legacy or UMP depending on @find_ump */ static int snd_rawmidi_next_device(struct snd_card *card, int __user *argp, bool find_ump) { struct snd_rawmidi *rmidi; int device; bool is_ump; if (get_user(device, argp)) return -EFAULT; if (device >= SNDRV_RAWMIDI_DEVICES) /* next device is -1 */ device = SNDRV_RAWMIDI_DEVICES - 1; mutex_lock(®ister_mutex); device = device < 0 ? 0 : device + 1; for (; device < SNDRV_RAWMIDI_DEVICES; device++) { rmidi = snd_rawmidi_search(card, device); if (!rmidi) continue; is_ump = rawmidi_is_ump(rmidi); if (find_ump == is_ump) break; } if (device == SNDRV_RAWMIDI_DEVICES) device = -1; mutex_unlock(®ister_mutex); if (put_user(device, argp)) return -EFAULT; return 0; } #if IS_ENABLED(CONFIG_SND_UMP) /* inquiry of UMP endpoint and block info via control API */ static int snd_rawmidi_call_ump_ioctl(struct snd_card *card, int cmd, void __user *argp) { struct snd_ump_endpoint_info __user *info = argp; struct snd_rawmidi *rmidi; int device, ret; if (get_user(device, &info->device)) return -EFAULT; mutex_lock(®ister_mutex); rmidi = snd_rawmidi_search(card, device); if (rmidi && rmidi->ops && rmidi->ops->ioctl) ret = rmidi->ops->ioctl(rmidi, cmd, argp); else ret = -ENXIO; mutex_unlock(®ister_mutex); return ret; } #endif static int snd_rawmidi_control_ioctl(struct snd_card *card, struct snd_ctl_file *control, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; switch (cmd) { case SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE: return snd_rawmidi_next_device(card, argp, false); #if IS_ENABLED(CONFIG_SND_UMP) case SNDRV_CTL_IOCTL_UMP_NEXT_DEVICE: return snd_rawmidi_next_device(card, argp, true); case SNDRV_CTL_IOCTL_UMP_ENDPOINT_INFO: return snd_rawmidi_call_ump_ioctl(card, SNDRV_UMP_IOCTL_ENDPOINT_INFO, argp); case SNDRV_CTL_IOCTL_UMP_BLOCK_INFO: return snd_rawmidi_call_ump_ioctl(card, SNDRV_UMP_IOCTL_BLOCK_INFO, argp); #endif case SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE: { int val; if (get_user(val, (int __user *)argp)) return -EFAULT; control->preferred_subdevice[SND_CTL_SUBDEV_RAWMIDI] = val; return 0; } case SNDRV_CTL_IOCTL_RAWMIDI_INFO: return snd_rawmidi_info_select_user(card, argp); } return -ENOIOCTLCMD; } static int receive_with_tstamp_framing(struct snd_rawmidi_substream *substream, const unsigned char *buffer, int src_count, const struct timespec64 *tstamp) { struct snd_rawmidi_runtime *runtime = substream->runtime; struct snd_rawmidi_framing_tstamp *dest_ptr; struct snd_rawmidi_framing_tstamp frame = { .tv_sec = tstamp->tv_sec, .tv_nsec = tstamp->tv_nsec }; int orig_count = src_count; int frame_size = sizeof(struct snd_rawmidi_framing_tstamp); int align = get_align(runtime); BUILD_BUG_ON(frame_size != 0x20); if (snd_BUG_ON((runtime->hw_ptr & 0x1f) != 0)) return -EINVAL; while (src_count > align) { if ((int)(runtime->buffer_size - runtime->avail) < frame_size) { runtime->xruns += src_count; break; } if (src_count >= SNDRV_RAWMIDI_FRAMING_DATA_LENGTH) frame.length = SNDRV_RAWMIDI_FRAMING_DATA_LENGTH; else { frame.length = get_aligned_size(runtime, src_count); if (!frame.length) break; memset(frame.data, 0, SNDRV_RAWMIDI_FRAMING_DATA_LENGTH); } memcpy(frame.data, buffer, frame.length); buffer += frame.length; src_count -= frame.length; dest_ptr = (struct snd_rawmidi_framing_tstamp *) (runtime->buffer + runtime->hw_ptr); *dest_ptr = frame; runtime->avail += frame_size; runtime->hw_ptr += frame_size; runtime->hw_ptr %= runtime->buffer_size; } return orig_count - src_count; } static struct timespec64 get_framing_tstamp(struct snd_rawmidi_substream *substream) { struct timespec64 ts64 = {0, 0}; switch (substream->clock_type) { case SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW: ktime_get_raw_ts64(&ts64); break; case SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC: ktime_get_ts64(&ts64); break; case SNDRV_RAWMIDI_MODE_CLOCK_REALTIME: ktime_get_real_ts64(&ts64); break; } return ts64; } /** * snd_rawmidi_receive - receive the input data from the device * @substream: the rawmidi substream * @buffer: the buffer pointer * @count: the data size to read * * Reads the data from the internal buffer. * * Return: The size of read data, or a negative error code on failure. */ int snd_rawmidi_receive(struct snd_rawmidi_substream *substream, const unsigned char *buffer, int count) { unsigned long flags; struct timespec64 ts64 = get_framing_tstamp(substream); int result = 0, count1; struct snd_rawmidi_runtime *runtime; spin_lock_irqsave(&substream->lock, flags); if (!substream->opened) { result = -EBADFD; goto unlock; } runtime = substream->runtime; if (!runtime || !runtime->buffer) { rmidi_dbg(substream->rmidi, "snd_rawmidi_receive: input is not active!!!\n"); result = -EINVAL; goto unlock; } count = get_aligned_size(runtime, count); if (!count) goto unlock; if (substream->framing == SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP) { result = receive_with_tstamp_framing(substream, buffer, count, &ts64); } else if (count == 1) { /* special case, faster code */ substream->bytes++; if (runtime->avail < runtime->buffer_size) { runtime->buffer[runtime->hw_ptr++] = buffer[0]; runtime->hw_ptr %= runtime->buffer_size; runtime->avail++; result++; } else { runtime->xruns++; } } else { substream->bytes += count; count1 = runtime->buffer_size - runtime->hw_ptr; if (count1 > count) count1 = count; if (count1 > (int)(runtime->buffer_size - runtime->avail)) count1 = runtime->buffer_size - runtime->avail; count1 = get_aligned_size(runtime, count1); if (!count1) goto unlock; memcpy(runtime->buffer + runtime->hw_ptr, buffer, count1); runtime->hw_ptr += count1; runtime->hw_ptr %= runtime->buffer_size; runtime->avail += count1; count -= count1; result += count1; if (count > 0) { buffer += count1; count1 = count; if (count1 > (int)(runtime->buffer_size - runtime->avail)) { count1 = runtime->buffer_size - runtime->avail; runtime->xruns += count - count1; } if (count1 > 0) { memcpy(runtime->buffer, buffer, count1); runtime->hw_ptr = count1; runtime->avail += count1; result += count1; } } } if (result > 0) { if (runtime->event) schedule_work(&runtime->event_work); else if (__snd_rawmidi_ready(runtime)) wake_up(&runtime->sleep); } unlock: spin_unlock_irqrestore(&substream->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_receive); static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, unsigned char __user *userbuf, unsigned char *kernelbuf, long count) { unsigned long flags; long result = 0, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; unsigned long appl_ptr; int err = 0; spin_lock_irqsave(&substream->lock, flags); snd_rawmidi_buffer_ref(runtime); while (count > 0 && runtime->avail) { count1 = runtime->buffer_size - runtime->appl_ptr; if (count1 > count) count1 = count; if (count1 > (int)runtime->avail) count1 = runtime->avail; /* update runtime->appl_ptr before unlocking for userbuf */ appl_ptr = runtime->appl_ptr; runtime->appl_ptr += count1; runtime->appl_ptr %= runtime->buffer_size; runtime->avail -= count1; if (kernelbuf) memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1); if (userbuf) { spin_unlock_irqrestore(&substream->lock, flags); if (copy_to_user(userbuf + result, runtime->buffer + appl_ptr, count1)) err = -EFAULT; spin_lock_irqsave(&substream->lock, flags); if (err) goto out; } result += count1; count -= count1; } out: snd_rawmidi_buffer_unref(runtime); spin_unlock_irqrestore(&substream->lock, flags); return result > 0 ? result : err; } long snd_rawmidi_kernel_read(struct snd_rawmidi_substream *substream, unsigned char *buf, long count) { snd_rawmidi_input_trigger(substream, 1); return snd_rawmidi_kernel_read1(substream, NULL/*userbuf*/, buf, count); } EXPORT_SYMBOL(snd_rawmidi_kernel_read); static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { long result; int count1; struct snd_rawmidi_file *rfile; struct snd_rawmidi_substream *substream; struct snd_rawmidi_runtime *runtime; rfile = file->private_data; substream = rfile->input; if (substream == NULL) return -EIO; runtime = substream->runtime; snd_rawmidi_input_trigger(substream, 1); result = 0; while (count > 0) { spin_lock_irq(&substream->lock); while (!__snd_rawmidi_ready(runtime)) { wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { spin_unlock_irq(&substream->lock); return result > 0 ? result : -EAGAIN; } init_waitqueue_entry(&wait, current); add_wait_queue(&runtime->sleep, &wait); set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&substream->lock); schedule(); remove_wait_queue(&runtime->sleep, &wait); if (rfile->rmidi->card->shutdown) return -ENODEV; if (signal_pending(current)) return result > 0 ? result : -ERESTARTSYS; spin_lock_irq(&substream->lock); if (!runtime->avail) { spin_unlock_irq(&substream->lock); return result > 0 ? result : -EIO; } } spin_unlock_irq(&substream->lock); count1 = snd_rawmidi_kernel_read1(substream, (unsigned char __user *)buf, NULL/*kernelbuf*/, count); if (count1 < 0) return result > 0 ? result : count1; result += count1; buf += count1; count -= count1; } return result; } /** * snd_rawmidi_transmit_empty - check whether the output buffer is empty * @substream: the rawmidi substream * * Return: 1 if the internal output buffer is empty, 0 if not. */ int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream) { struct snd_rawmidi_runtime *runtime; int result; unsigned long flags; spin_lock_irqsave(&substream->lock, flags); runtime = substream->runtime; if (!substream->opened || !runtime || !runtime->buffer) { rmidi_dbg(substream->rmidi, "snd_rawmidi_transmit_empty: output is not active!!!\n"); result = 1; } else { result = runtime->avail >= runtime->buffer_size; } spin_unlock_irqrestore(&substream->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_transmit_empty); /* * __snd_rawmidi_transmit_peek - copy data from the internal buffer * @substream: the rawmidi substream * @buffer: the buffer pointer * @count: data size to transfer * * This is a variant of snd_rawmidi_transmit_peek() without spinlock. */ static int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { int result, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; if (runtime->buffer == NULL) { rmidi_dbg(substream->rmidi, "snd_rawmidi_transmit_peek: output is not active!!!\n"); return -EINVAL; } result = 0; if (runtime->avail >= runtime->buffer_size) { /* warning: lowlevel layer MUST trigger down the hardware */ goto __skip; } if (count == 1) { /* special case, faster code */ *buffer = runtime->buffer[runtime->hw_ptr]; result++; } else { count1 = runtime->buffer_size - runtime->hw_ptr; if (count1 > count) count1 = count; if (count1 > (int)(runtime->buffer_size - runtime->avail)) count1 = runtime->buffer_size - runtime->avail; count1 = get_aligned_size(runtime, count1); if (!count1) goto __skip; memcpy(buffer, runtime->buffer + runtime->hw_ptr, count1); count -= count1; result += count1; if (count > 0) { if (count > (int)(runtime->buffer_size - runtime->avail - count1)) count = runtime->buffer_size - runtime->avail - count1; count = get_aligned_size(runtime, count); if (!count) goto __skip; memcpy(buffer + count1, runtime->buffer, count); result += count; } } __skip: return result; } /** * snd_rawmidi_transmit_peek - copy data from the internal buffer * @substream: the rawmidi substream * @buffer: the buffer pointer * @count: data size to transfer * * Copies data from the internal output buffer to the given buffer. * * Call this in the interrupt handler when the midi output is ready, * and call snd_rawmidi_transmit_ack() after the transmission is * finished. * * Return: The size of copied data, or a negative error code on failure. */ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { int result; unsigned long flags; spin_lock_irqsave(&substream->lock, flags); if (!substream->opened || !substream->runtime) result = -EBADFD; else result = __snd_rawmidi_transmit_peek(substream, buffer, count); spin_unlock_irqrestore(&substream->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_transmit_peek); /* * __snd_rawmidi_transmit_ack - acknowledge the transmission * @substream: the rawmidi substream * @count: the transferred count * * This is a variant of __snd_rawmidi_transmit_ack() without spinlock. */ static int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) { struct snd_rawmidi_runtime *runtime = substream->runtime; if (runtime->buffer == NULL) { rmidi_dbg(substream->rmidi, "snd_rawmidi_transmit_ack: output is not active!!!\n"); return -EINVAL; } snd_BUG_ON(runtime->avail + count > runtime->buffer_size); count = get_aligned_size(runtime, count); runtime->hw_ptr += count; runtime->hw_ptr %= runtime->buffer_size; runtime->avail += count; substream->bytes += count; if (count > 0) { if (runtime->drain || __snd_rawmidi_ready(runtime)) wake_up(&runtime->sleep); } return count; } /** * snd_rawmidi_transmit_ack - acknowledge the transmission * @substream: the rawmidi substream * @count: the transferred count * * Advances the hardware pointer for the internal output buffer with * the given size and updates the condition. * Call after the transmission is finished. * * Return: The advanced size if successful, or a negative error code on failure. */ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) { int result; unsigned long flags; spin_lock_irqsave(&substream->lock, flags); if (!substream->opened || !substream->runtime) result = -EBADFD; else result = __snd_rawmidi_transmit_ack(substream, count); spin_unlock_irqrestore(&substream->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_transmit_ack); /** * snd_rawmidi_transmit - copy from the buffer to the device * @substream: the rawmidi substream * @buffer: the buffer pointer * @count: the data size to transfer * * Copies data from the buffer to the device and advances the pointer. * * Return: The copied size if successful, or a negative error code on failure. */ int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { int result; unsigned long flags; spin_lock_irqsave(&substream->lock, flags); if (!substream->opened) result = -EBADFD; else { count = __snd_rawmidi_transmit_peek(substream, buffer, count); if (count <= 0) result = count; else result = __snd_rawmidi_transmit_ack(substream, count); } spin_unlock_irqrestore(&substream->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_transmit); /** * snd_rawmidi_proceed - Discard the all pending bytes and proceed * @substream: rawmidi substream * * Return: the number of discarded bytes */ int snd_rawmidi_proceed(struct snd_rawmidi_substream *substream) { struct snd_rawmidi_runtime *runtime; unsigned long flags; int count = 0; spin_lock_irqsave(&substream->lock, flags); runtime = substream->runtime; if (substream->opened && runtime && runtime->avail < runtime->buffer_size) { count = runtime->buffer_size - runtime->avail; __snd_rawmidi_transmit_ack(substream, count); } spin_unlock_irqrestore(&substream->lock, flags); return count; } EXPORT_SYMBOL(snd_rawmidi_proceed); static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, const unsigned char __user *userbuf, const unsigned char *kernelbuf, long count) { unsigned long flags; long count1, result; struct snd_rawmidi_runtime *runtime = substream->runtime; unsigned long appl_ptr; if (!kernelbuf && !userbuf) return -EINVAL; if (snd_BUG_ON(!runtime->buffer)) return -EINVAL; result = 0; spin_lock_irqsave(&substream->lock, flags); if (substream->append) { if ((long)runtime->avail < count) { spin_unlock_irqrestore(&substream->lock, flags); return -EAGAIN; } } snd_rawmidi_buffer_ref(runtime); while (count > 0 && runtime->avail > 0) { count1 = runtime->buffer_size - runtime->appl_ptr; if (count1 > count) count1 = count; if (count1 > (long)runtime->avail) count1 = runtime->avail; /* update runtime->appl_ptr before unlocking for userbuf */ appl_ptr = runtime->appl_ptr; runtime->appl_ptr += count1; runtime->appl_ptr %= runtime->buffer_size; runtime->avail -= count1; if (kernelbuf) memcpy(runtime->buffer + appl_ptr, kernelbuf + result, count1); else if (userbuf) { spin_unlock_irqrestore(&substream->lock, flags); if (copy_from_user(runtime->buffer + appl_ptr, userbuf + result, count1)) { spin_lock_irqsave(&substream->lock, flags); result = result > 0 ? result : -EFAULT; goto __end; } spin_lock_irqsave(&substream->lock, flags); } result += count1; count -= count1; } __end: count1 = runtime->avail < runtime->buffer_size; snd_rawmidi_buffer_unref(runtime); spin_unlock_irqrestore(&substream->lock, flags); if (count1) snd_rawmidi_output_trigger(substream, 1); return result; } long snd_rawmidi_kernel_write(struct snd_rawmidi_substream *substream, const unsigned char *buf, long count) { return snd_rawmidi_kernel_write1(substream, NULL, buf, count); } EXPORT_SYMBOL(snd_rawmidi_kernel_write); static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { long result, timeout; int count1; struct snd_rawmidi_file *rfile; struct snd_rawmidi_runtime *runtime; struct snd_rawmidi_substream *substream; rfile = file->private_data; substream = rfile->output; runtime = substream->runtime; /* we cannot put an atomic message to our buffer */ if (substream->append && count > runtime->buffer_size) return -EIO; result = 0; while (count > 0) { spin_lock_irq(&substream->lock); while (!snd_rawmidi_ready_append(substream, count)) { wait_queue_entry_t wait; if (file->f_flags & O_NONBLOCK) { spin_unlock_irq(&substream->lock); return result > 0 ? result : -EAGAIN; } init_waitqueue_entry(&wait, current); add_wait_queue(&runtime->sleep, &wait); set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&substream->lock); timeout = schedule_timeout(30 * HZ); remove_wait_queue(&runtime->sleep, &wait); if (rfile->rmidi->card->shutdown) return -ENODEV; if (signal_pending(current)) return result > 0 ? result : -ERESTARTSYS; spin_lock_irq(&substream->lock); if (!runtime->avail && !timeout) { spin_unlock_irq(&substream->lock); return result > 0 ? result : -EIO; } } spin_unlock_irq(&substream->lock); count1 = snd_rawmidi_kernel_write1(substream, buf, NULL, count); if (count1 < 0) return result > 0 ? result : count1; result += count1; buf += count1; if ((size_t)count1 < count && (file->f_flags & O_NONBLOCK)) break; count -= count1; } if (file->f_flags & O_DSYNC) { spin_lock_irq(&substream->lock); while (runtime->avail != runtime->buffer_size) { wait_queue_entry_t wait; unsigned int last_avail = runtime->avail; init_waitqueue_entry(&wait, current); add_wait_queue(&runtime->sleep, &wait); set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&substream->lock); timeout = schedule_timeout(30 * HZ); remove_wait_queue(&runtime->sleep, &wait); if (signal_pending(current)) return result > 0 ? result : -ERESTARTSYS; if (runtime->avail == last_avail && !timeout) return result > 0 ? result : -EIO; spin_lock_irq(&substream->lock); } spin_unlock_irq(&substream->lock); } return result; } static __poll_t snd_rawmidi_poll(struct file *file, poll_table *wait) { struct snd_rawmidi_file *rfile; struct snd_rawmidi_runtime *runtime; __poll_t mask; rfile = file->private_data; if (rfile->input != NULL) { runtime = rfile->input->runtime; snd_rawmidi_input_trigger(rfile->input, 1); poll_wait(file, &runtime->sleep, wait); } if (rfile->output != NULL) { runtime = rfile->output->runtime; poll_wait(file, &runtime->sleep, wait); } mask = 0; if (rfile->input != NULL) { if (snd_rawmidi_ready(rfile->input)) mask |= EPOLLIN | EPOLLRDNORM; } if (rfile->output != NULL) { if (snd_rawmidi_ready(rfile->output)) mask |= EPOLLOUT | EPOLLWRNORM; } return mask; } /* */ #ifdef CONFIG_COMPAT #include "rawmidi_compat.c" #else #define snd_rawmidi_ioctl_compat NULL #endif /* */ static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_rawmidi *rmidi; struct snd_rawmidi_substream *substream; struct snd_rawmidi_runtime *runtime; unsigned long buffer_size, avail, xruns; unsigned int clock_type; static const char *clock_names[4] = { "none", "realtime", "monotonic", "monotonic raw" }; rmidi = entry->private_data; snd_iprintf(buffer, "%s\n\n", rmidi->name); if (IS_ENABLED(CONFIG_SND_UMP)) snd_iprintf(buffer, "Type: %s\n", rawmidi_is_ump(rmidi) ? "UMP" : "Legacy"); if (rmidi->ops && rmidi->ops->proc_read) rmidi->ops->proc_read(entry, buffer); mutex_lock(&rmidi->open_mutex); if (rmidi->info_flags & SNDRV_RAWMIDI_INFO_OUTPUT) { list_for_each_entry(substream, &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams, list) { snd_iprintf(buffer, "Output %d\n" " Tx bytes : %lu\n", substream->number, (unsigned long) substream->bytes); if (substream->opened) { snd_iprintf(buffer, " Owner PID : %d\n", pid_vnr(substream->pid)); runtime = substream->runtime; spin_lock_irq(&substream->lock); buffer_size = runtime->buffer_size; avail = runtime->avail; spin_unlock_irq(&substream->lock); snd_iprintf(buffer, " Mode : %s\n" " Buffer size : %lu\n" " Avail : %lu\n", runtime->oss ? "OSS compatible" : "native", buffer_size, avail); } } } if (rmidi->info_flags & SNDRV_RAWMIDI_INFO_INPUT) { list_for_each_entry(substream, &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams, list) { snd_iprintf(buffer, "Input %d\n" " Rx bytes : %lu\n", substream->number, (unsigned long) substream->bytes); if (substream->opened) { snd_iprintf(buffer, " Owner PID : %d\n", pid_vnr(substream->pid)); runtime = substream->runtime; spin_lock_irq(&substream->lock); buffer_size = runtime->buffer_size; avail = runtime->avail; xruns = runtime->xruns; spin_unlock_irq(&substream->lock); snd_iprintf(buffer, " Buffer size : %lu\n" " Avail : %lu\n" " Overruns : %lu\n", buffer_size, avail, xruns); if (substream->framing == SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP) { clock_type = substream->clock_type >> SNDRV_RAWMIDI_MODE_CLOCK_SHIFT; if (!snd_BUG_ON(clock_type >= ARRAY_SIZE(clock_names))) snd_iprintf(buffer, " Framing : tstamp\n" " Clock type : %s\n", clock_names[clock_type]); } } } } mutex_unlock(&rmidi->open_mutex); } /* * Register functions */ static const struct file_operations snd_rawmidi_f_ops = { .owner = THIS_MODULE, .read = snd_rawmidi_read, .write = snd_rawmidi_write, .open = snd_rawmidi_open, .release = snd_rawmidi_release, .llseek = no_llseek, .poll = snd_rawmidi_poll, .unlocked_ioctl = snd_rawmidi_ioctl, .compat_ioctl = snd_rawmidi_ioctl_compat, }; static int snd_rawmidi_alloc_substreams(struct snd_rawmidi *rmidi, struct snd_rawmidi_str *stream, int direction, int count) { struct snd_rawmidi_substream *substream; int idx; for (idx = 0; idx < count; idx++) { substream = kzalloc(sizeof(*substream), GFP_KERNEL); if (!substream) return -ENOMEM; substream->stream = direction; substream->number = idx; substream->rmidi = rmidi; substream->pstr = stream; spin_lock_init(&substream->lock); list_add_tail(&substream->list, &stream->substreams); stream->substream_count++; } return 0; } /* used for both rawmidi and ump */ int snd_rawmidi_init(struct snd_rawmidi *rmidi, struct snd_card *card, char *id, int device, int output_count, int input_count, unsigned int info_flags) { int err; static const struct snd_device_ops ops = { .dev_free = snd_rawmidi_dev_free, .dev_register = snd_rawmidi_dev_register, .dev_disconnect = snd_rawmidi_dev_disconnect, }; rmidi->card = card; rmidi->device = device; mutex_init(&rmidi->open_mutex); init_waitqueue_head(&rmidi->open_wait); INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams); INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams); rmidi->info_flags = info_flags; if (id != NULL) strscpy(rmidi->id, id, sizeof(rmidi->id)); err = snd_device_alloc(&rmidi->dev, card); if (err < 0) return err; if (rawmidi_is_ump(rmidi)) dev_set_name(rmidi->dev, "umpC%iD%i", card->number, device); else dev_set_name(rmidi->dev, "midiC%iD%i", card->number, device); err = snd_rawmidi_alloc_substreams(rmidi, &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT], SNDRV_RAWMIDI_STREAM_INPUT, input_count); if (err < 0) return err; err = snd_rawmidi_alloc_substreams(rmidi, &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT], SNDRV_RAWMIDI_STREAM_OUTPUT, output_count); if (err < 0) return err; err = snd_device_new(card, SNDRV_DEV_RAWMIDI, rmidi, &ops); if (err < 0) return err; return 0; } EXPORT_SYMBOL_GPL(snd_rawmidi_init); /** * snd_rawmidi_new - create a rawmidi instance * @card: the card instance * @id: the id string * @device: the device index * @output_count: the number of output streams * @input_count: the number of input streams * @rrawmidi: the pointer to store the new rawmidi instance * * Creates a new rawmidi instance. * Use snd_rawmidi_set_ops() to set the operators to the new instance. * * Return: Zero if successful, or a negative error code on failure. */ int snd_rawmidi_new(struct snd_card *card, char *id, int device, int output_count, int input_count, struct snd_rawmidi **rrawmidi) { struct snd_rawmidi *rmidi; int err; if (rrawmidi) *rrawmidi = NULL; rmidi = kzalloc(sizeof(*rmidi), GFP_KERNEL); if (!rmidi) return -ENOMEM; err = snd_rawmidi_init(rmidi, card, id, device, output_count, input_count, 0); if (err < 0) { snd_rawmidi_free(rmidi); return err; } if (rrawmidi) *rrawmidi = rmidi; return 0; } EXPORT_SYMBOL(snd_rawmidi_new); static void snd_rawmidi_free_substreams(struct snd_rawmidi_str *stream) { struct snd_rawmidi_substream *substream; while (!list_empty(&stream->substreams)) { substream = list_entry(stream->substreams.next, struct snd_rawmidi_substream, list); list_del(&substream->list); kfree(substream); } } /* called from ump.c, too */ int snd_rawmidi_free(struct snd_rawmidi *rmidi) { if (!rmidi) return 0; snd_info_free_entry(rmidi->proc_entry); rmidi->proc_entry = NULL; if (rmidi->ops && rmidi->ops->dev_unregister) rmidi->ops->dev_unregister(rmidi); snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]); snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]); if (rmidi->private_free) rmidi->private_free(rmidi); put_device(rmidi->dev); kfree(rmidi); return 0; } EXPORT_SYMBOL_GPL(snd_rawmidi_free); static int snd_rawmidi_dev_free(struct snd_device *device) { struct snd_rawmidi *rmidi = device->device_data; return snd_rawmidi_free(rmidi); } #if IS_ENABLED(CONFIG_SND_SEQUENCER) static void snd_rawmidi_dev_seq_free(struct snd_seq_device *device) { struct snd_rawmidi *rmidi = device->private_data; rmidi->seq_dev = NULL; } #endif static int snd_rawmidi_dev_register(struct snd_device *device) { int err; struct snd_info_entry *entry; char name[16]; struct snd_rawmidi *rmidi = device->device_data; if (rmidi->device >= SNDRV_RAWMIDI_DEVICES) return -ENOMEM; err = 0; mutex_lock(®ister_mutex); if (snd_rawmidi_search(rmidi->card, rmidi->device)) err = -EBUSY; else list_add_tail(&rmidi->list, &snd_rawmidi_devices); mutex_unlock(®ister_mutex); if (err < 0) return err; err = snd_register_device(SNDRV_DEVICE_TYPE_RAWMIDI, rmidi->card, rmidi->device, &snd_rawmidi_f_ops, rmidi, rmidi->dev); if (err < 0) { rmidi_err(rmidi, "unable to register\n"); goto error; } if (rmidi->ops && rmidi->ops->dev_register) { err = rmidi->ops->dev_register(rmidi); if (err < 0) goto error_unregister; } #ifdef CONFIG_SND_OSSEMUL rmidi->ossreg = 0; if (!rawmidi_is_ump(rmidi) && (int)rmidi->device == midi_map[rmidi->card->number]) { if (snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI, rmidi->card, 0, &snd_rawmidi_f_ops, rmidi) < 0) { rmidi_err(rmidi, "unable to register OSS rawmidi device %i:%i\n", rmidi->card->number, 0); } else { rmidi->ossreg++; #ifdef SNDRV_OSS_INFO_DEV_MIDI snd_oss_info_register(SNDRV_OSS_INFO_DEV_MIDI, rmidi->card->number, rmidi->name); #endif } } if (!rawmidi_is_ump(rmidi) && (int)rmidi->device == amidi_map[rmidi->card->number]) { if (snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI, rmidi->card, 1, &snd_rawmidi_f_ops, rmidi) < 0) { rmidi_err(rmidi, "unable to register OSS rawmidi device %i:%i\n", rmidi->card->number, 1); } else { rmidi->ossreg++; } } #endif /* CONFIG_SND_OSSEMUL */ sprintf(name, "midi%d", rmidi->device); entry = snd_info_create_card_entry(rmidi->card, name, rmidi->card->proc_root); if (entry) { entry->private_data = rmidi; entry->c.text.read = snd_rawmidi_proc_info_read; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } } rmidi->proc_entry = entry; #if IS_ENABLED(CONFIG_SND_SEQUENCER) /* no own registration mechanism? */ if (!rmidi->ops || !rmidi->ops->dev_register) { if (snd_seq_device_new(rmidi->card, rmidi->device, SNDRV_SEQ_DEV_ID_MIDISYNTH, 0, &rmidi->seq_dev) >= 0) { rmidi->seq_dev->private_data = rmidi; rmidi->seq_dev->private_free = snd_rawmidi_dev_seq_free; sprintf(rmidi->seq_dev->name, "MIDI %d-%d", rmidi->card->number, rmidi->device); snd_device_register(rmidi->card, rmidi->seq_dev); } } #endif return 0; error_unregister: snd_unregister_device(rmidi->dev); error: mutex_lock(®ister_mutex); list_del(&rmidi->list); mutex_unlock(®ister_mutex); return err; } static int snd_rawmidi_dev_disconnect(struct snd_device *device) { struct snd_rawmidi *rmidi = device->device_data; int dir; mutex_lock(®ister_mutex); mutex_lock(&rmidi->open_mutex); wake_up(&rmidi->open_wait); list_del_init(&rmidi->list); for (dir = 0; dir < 2; dir++) { struct snd_rawmidi_substream *s; list_for_each_entry(s, &rmidi->streams[dir].substreams, list) { if (s->runtime) wake_up(&s->runtime->sleep); } } #ifdef CONFIG_SND_OSSEMUL if (rmidi->ossreg) { if ((int)rmidi->device == midi_map[rmidi->card->number]) { snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI, rmidi->card, 0); #ifdef SNDRV_OSS_INFO_DEV_MIDI snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIDI, rmidi->card->number); #endif } if ((int)rmidi->device == amidi_map[rmidi->card->number]) snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI, rmidi->card, 1); rmidi->ossreg = 0; } #endif /* CONFIG_SND_OSSEMUL */ snd_unregister_device(rmidi->dev); mutex_unlock(&rmidi->open_mutex); mutex_unlock(®ister_mutex); return 0; } /** * snd_rawmidi_set_ops - set the rawmidi operators * @rmidi: the rawmidi instance * @stream: the stream direction, SNDRV_RAWMIDI_STREAM_XXX * @ops: the operator table * * Sets the rawmidi operators for the given stream direction. */ void snd_rawmidi_set_ops(struct snd_rawmidi *rmidi, int stream, const struct snd_rawmidi_ops *ops) { struct snd_rawmidi_substream *substream; list_for_each_entry(substream, &rmidi->streams[stream].substreams, list) substream->ops = ops; } EXPORT_SYMBOL(snd_rawmidi_set_ops); /* * ENTRY functions */ static int __init alsa_rawmidi_init(void) { snd_ctl_register_ioctl(snd_rawmidi_control_ioctl); snd_ctl_register_ioctl_compat(snd_rawmidi_control_ioctl); #ifdef CONFIG_SND_OSSEMUL { int i; /* check device map table */ for (i = 0; i < SNDRV_CARDS; i++) { if (midi_map[i] < 0 || midi_map[i] >= SNDRV_RAWMIDI_DEVICES) { pr_err("ALSA: rawmidi: invalid midi_map[%d] = %d\n", i, midi_map[i]); midi_map[i] = 0; } if (amidi_map[i] < 0 || amidi_map[i] >= SNDRV_RAWMIDI_DEVICES) { pr_err("ALSA: rawmidi: invalid amidi_map[%d] = %d\n", i, amidi_map[i]); amidi_map[i] = 1; } } } #endif /* CONFIG_SND_OSSEMUL */ return 0; } static void __exit alsa_rawmidi_exit(void) { snd_ctl_unregister_ioctl(snd_rawmidi_control_ioctl); snd_ctl_unregister_ioctl_compat(snd_rawmidi_control_ioctl); } module_init(alsa_rawmidi_init) module_exit(alsa_rawmidi_exit) |
18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> */ #undef TRACE_SYSTEM #define TRACE_SYSTEM nfs #if !defined(_TRACE_NFS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NFS_H #include <linux/tracepoint.h> #include <linux/iversion.h> #include <trace/misc/fs.h> #include <trace/misc/nfs.h> #include <trace/misc/sunrpc.h> #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }, \ { NFS_INO_INVALID_CHANGE, "INVALID_CHANGE" }, \ { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \ { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ { NFS_INO_INVALID_MODE, "INVALID_MODE" }) #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ { BIT(NFS_INO_STALE), "STALE" }, \ { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \ { BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \ { BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \ { BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \ { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ { BIT(NFS_INO_ODIRECT), "ODIRECT" }) DECLARE_EVENT_CLASS(nfs_inode_event, TP_PROTO( const struct inode *inode ), TP_ARGS(inode), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (unsigned long long)__entry->version ) ); DECLARE_EVENT_CLASS(nfs_inode_event_done, TP_PROTO( const struct inode *inode, int error ), TP_ARGS(inode, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(unsigned char, type) __field(u64, fileid) __field(u64, version) __field(loff_t, size) __field(unsigned long, nfsi_flags) __field(unsigned long, cache_validity) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->type = nfs_umode_to_dtype(inode->i_mode); __entry->version = inode_peek_iversion_raw(inode); __entry->size = i_size_read(inode); __entry->nfsi_flags = nfsi->flags; __entry->cache_validity = nfsi->cache_validity; ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, nfs_show_cache_validity(__entry->cache_validity), __entry->nfsi_flags, nfs_show_nfsi_flags(__entry->nfsi_flags) ) ); #define DEFINE_NFS_INODE_EVENT(name) \ DEFINE_EVENT(nfs_inode_event, name, \ TP_PROTO( \ const struct inode *inode \ ), \ TP_ARGS(inode)) #define DEFINE_NFS_INODE_EVENT_DONE(name) \ DEFINE_EVENT(nfs_inode_event_done, name, \ TP_PROTO( \ const struct inode *inode, \ int error \ ), \ TP_ARGS(inode, error)) DEFINE_NFS_INODE_EVENT(nfs_set_inode_stale); DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_revalidate_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_invalidate_mapping_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_invalidate_mapping_exit); DEFINE_NFS_INODE_EVENT(nfs_getattr_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit); DEFINE_NFS_INODE_EVENT(nfs_setattr_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit); DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_set_cache_invalid); DEFINE_NFS_INODE_EVENT(nfs_readdir_force_readdirplus); DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_cache_fill_done); DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_uncached_done); TRACE_EVENT(nfs_access_exit, TP_PROTO( const struct inode *inode, unsigned int mask, unsigned int permitted, int error ), TP_ARGS(inode, mask, permitted, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(unsigned char, type) __field(u64, fileid) __field(u64, version) __field(loff_t, size) __field(unsigned long, nfsi_flags) __field(unsigned long, cache_validity) __field(unsigned int, mask) __field(unsigned int, permitted) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->type = nfs_umode_to_dtype(inode->i_mode); __entry->version = inode_peek_iversion_raw(inode); __entry->size = i_size_read(inode); __entry->nfsi_flags = nfsi->flags; __entry->cache_validity = nfsi->cache_validity; __entry->mask = mask; __entry->permitted = permitted; ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " "mask=0x%x permitted=0x%x", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, nfs_show_cache_validity(__entry->cache_validity), __entry->nfsi_flags, nfs_show_nfsi_flags(__entry->nfsi_flags), __entry->mask, __entry->permitted ) ); DECLARE_EVENT_CLASS(nfs_update_size_class, TP_PROTO( const struct inode *inode, loff_t new_size ), TP_ARGS(inode, new_size), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, cur_size) __field(loff_t, new_size) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->fileid = nfsi->fileid; __entry->version = inode_peek_iversion_raw(inode); __entry->cur_size = i_size_read(inode); __entry->new_size = new_size; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cursize=%lld newsize=%lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->cur_size, __entry->new_size ) ); #define DEFINE_NFS_UPDATE_SIZE_EVENT(name) \ DEFINE_EVENT(nfs_update_size_class, nfs_size_##name, \ TP_PROTO( \ const struct inode *inode, \ loff_t new_size \ ), \ TP_ARGS(inode, new_size)) DEFINE_NFS_UPDATE_SIZE_EVENT(truncate); DEFINE_NFS_UPDATE_SIZE_EVENT(wcc); DEFINE_NFS_UPDATE_SIZE_EVENT(update); DEFINE_NFS_UPDATE_SIZE_EVENT(grow); DECLARE_EVENT_CLASS(nfs_inode_range_event, TP_PROTO( const struct inode *inode, loff_t range_start, loff_t range_end ), TP_ARGS(inode, range_start, range_end), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, range_start) __field(loff_t, range_end) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->fileid = nfsi->fileid; __entry->version = inode_peek_iversion_raw(inode); __entry->range_start = range_start; __entry->range_end = range_end; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "range=[%lld, %lld]", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->range_start, __entry->range_end ) ); #define DEFINE_NFS_INODE_RANGE_EVENT(name) \ DEFINE_EVENT(nfs_inode_range_event, name, \ TP_PROTO( \ const struct inode *inode, \ loff_t range_start, \ loff_t range_end \ ), \ TP_ARGS(inode, range_start, range_end)) DEFINE_NFS_INODE_RANGE_EVENT(nfs_readdir_invalidate_cache_range); DECLARE_EVENT_CLASS(nfs_readdir_event, TP_PROTO( const struct file *file, const __be32 *verifier, u64 cookie, pgoff_t page_index, unsigned int dtsize ), TP_ARGS(file, verifier, cookie, page_index, dtsize), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __array(char, verifier, NFS4_VERIFIER_SIZE) __field(u64, cookie) __field(pgoff_t, index) __field(unsigned int, dtsize) ), TP_fast_assign( const struct inode *dir = file_inode(file); const struct nfs_inode *nfsi = NFS_I(dir); __entry->dev = dir->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(dir); if (cookie != 0) memcpy(__entry->verifier, verifier, NFS4_VERIFIER_SIZE); else memset(__entry->verifier, 0, NFS4_VERIFIER_SIZE); __entry->cookie = cookie; __entry->index = page_index; __entry->dtsize = dtsize; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "cookie=%s:0x%llx cache_index=%lu dtsize=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, show_nfs4_verifier(__entry->verifier), (unsigned long long)__entry->cookie, __entry->index, __entry->dtsize ) ); #define DEFINE_NFS_READDIR_EVENT(name) \ DEFINE_EVENT(nfs_readdir_event, name, \ TP_PROTO( \ const struct file *file, \ const __be32 *verifier, \ u64 cookie, \ pgoff_t page_index, \ unsigned int dtsize \ ), \ TP_ARGS(file, verifier, cookie, page_index, dtsize)) DEFINE_NFS_READDIR_EVENT(nfs_readdir_cache_fill); DEFINE_NFS_READDIR_EVENT(nfs_readdir_uncached); DECLARE_EVENT_CLASS(nfs_lookup_event, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags ), TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __field(u64, fileid) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name, dentry->d_name.name); ), TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name), __entry->fileid ) ); #define DEFINE_NFS_LOOKUP_EVENT(name) \ DEFINE_EVENT(nfs_lookup_event, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry, \ unsigned int flags \ ), \ TP_ARGS(dir, dentry, flags)) DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags, int error ), TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __field(u64, fileid) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __entry->flags = flags; __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name, dentry->d_name.name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name), __entry->fileid ) ); #define DEFINE_NFS_LOOKUP_EVENT_DONE(name) \ DEFINE_EVENT(nfs_lookup_event_done, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry, \ unsigned int flags, \ int error \ ), \ TP_ARGS(dir, dentry, flags, error)) DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup); DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup_revalidate_failed); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_readdir_lookup_revalidate); TRACE_EVENT(nfs_atomic_open_enter, TP_PROTO( const struct inode *dir, const struct nfs_open_context *ctx, unsigned int flags ), TP_ARGS(dir, ctx, flags), TP_STRUCT__entry( __field(unsigned long, flags) __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), TP_printk( "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", __entry->flags, show_fs_fcntl_open_flags(__entry->flags), show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_atomic_open_exit, TP_PROTO( const struct inode *dir, const struct nfs_open_context *ctx, unsigned int flags, int error ), TP_ARGS(dir, ctx, flags, error), TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) ), TP_fast_assign( __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) fmode=%s " "name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_fcntl_open_flags(__entry->flags), show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_create_enter, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags ), TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __assign_str(name, dentry->d_name.name); ), TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_create_exit, TP_PROTO( const struct inode *dir, const struct dentry *dentry, unsigned int flags, int error ), TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __assign_str(name, dentry->d_name.name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); DECLARE_EVENT_CLASS(nfs_directory_event, TP_PROTO( const struct inode *dir, const struct dentry *dentry ), TP_ARGS(dir, dentry), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __assign_str(name, dentry->d_name.name); ), TP_printk( "name=%02x:%02x:%llu/%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); #define DEFINE_NFS_DIRECTORY_EVENT(name) \ DEFINE_EVENT(nfs_directory_event, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry \ ), \ TP_ARGS(dir, dentry)) DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_PROTO( const struct inode *dir, const struct dentry *dentry, int error ), TP_ARGS(dir, dentry, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __assign_str(name, dentry->d_name.name); ), TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); #define DEFINE_NFS_DIRECTORY_EVENT_DONE(name) \ DEFINE_EVENT(nfs_directory_event_done, name, \ TP_PROTO( \ const struct inode *dir, \ const struct dentry *dentry, \ int error \ ), \ TP_ARGS(dir, dentry, error)) DEFINE_NFS_DIRECTORY_EVENT(nfs_mknod_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mknod_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_mkdir_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mkdir_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_rmdir_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_rmdir_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_remove_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_remove_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_unlink_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit); DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter); DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit); TRACE_EVENT(nfs_link_enter, TP_PROTO( const struct inode *inode, const struct inode *dir, const struct dentry *dentry ), TP_ARGS(inode, dir, dentry), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, fileid) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); __assign_str(name, dentry->d_name.name); ), TP_printk( "fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); TRACE_EVENT(nfs_link_exit, TP_PROTO( const struct inode *inode, const struct inode *dir, const struct dentry *dentry, int error ), TP_ARGS(inode, dir, dentry, error), TP_STRUCT__entry( __field(unsigned long, error) __field(dev_t, dev) __field(u64, fileid) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __assign_str(name, dentry->d_name.name); ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); DECLARE_EVENT_CLASS(nfs_rename_event, TP_PROTO( const struct inode *old_dir, const struct dentry *old_dentry, const struct inode *new_dir, const struct dentry *new_dentry ), TP_ARGS(old_dir, old_dentry, new_dir, new_dentry), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, old_dir) __field(u64, new_dir) __string(old_name, old_dentry->d_name.name) __string(new_name, new_dentry->d_name.name) ), TP_fast_assign( __entry->dev = old_dir->i_sb->s_dev; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); __assign_str(old_name, old_dentry->d_name.name); __assign_str(new_name, new_dentry->d_name.name); ), TP_printk( "old_name=%02x:%02x:%llu/%s new_name=%02x:%02x:%llu/%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->new_dir, __get_str(new_name) ) ); #define DEFINE_NFS_RENAME_EVENT(name) \ DEFINE_EVENT(nfs_rename_event, name, \ TP_PROTO( \ const struct inode *old_dir, \ const struct dentry *old_dentry, \ const struct inode *new_dir, \ const struct dentry *new_dentry \ ), \ TP_ARGS(old_dir, old_dentry, new_dir, new_dentry)) DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_PROTO( const struct inode *old_dir, const struct dentry *old_dentry, const struct inode *new_dir, const struct dentry *new_dentry, int error ), TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, error), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, error) __field(u64, old_dir) __string(old_name, old_dentry->d_name.name) __field(u64, new_dir) __string(new_name, new_dentry->d_name.name) ), TP_fast_assign( __entry->dev = old_dir->i_sb->s_dev; __entry->error = -error; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); __assign_str(old_name, old_dentry->d_name.name); __assign_str(new_name, new_dentry->d_name.name); ), TP_printk( "error=%ld (%s) old_name=%02x:%02x:%llu/%s " "new_name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->new_dir, __get_str(new_name) ) ); #define DEFINE_NFS_RENAME_EVENT_DONE(name) \ DEFINE_EVENT(nfs_rename_event_done, name, \ TP_PROTO( \ const struct inode *old_dir, \ const struct dentry *old_dentry, \ const struct inode *new_dir, \ const struct dentry *new_dentry, \ int error \ ), \ TP_ARGS(old_dir, old_dentry, new_dir, \ new_dentry, error)) DEFINE_NFS_RENAME_EVENT(nfs_rename_enter); DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit); DEFINE_NFS_RENAME_EVENT_DONE(nfs_async_rename_done); TRACE_EVENT(nfs_sillyrename_unlink, TP_PROTO( const struct nfs_unlinkdata *data, int error ), TP_ARGS(data, error), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, error) __field(u64, dir) __dynamic_array(char, name, data->args.name.len + 1) ), TP_fast_assign( struct inode *dir = d_inode(data->dentry->d_parent); size_t len = data->args.name.len; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = -error; memcpy(__get_str(name), data->args.name.name, len); __get_str(name)[len] = 0; ), TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); DECLARE_EVENT_CLASS(nfs_folio_event, TP_PROTO( const struct inode *inode, struct folio *folio ), TP_ARGS(inode, folio), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(u32, count) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->offset = folio_file_pos(folio); __entry->count = nfs_folio_length(folio); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->offset, __entry->count ) ); #define DEFINE_NFS_FOLIO_EVENT(name) \ DEFINE_EVENT(nfs_folio_event, name, \ TP_PROTO( \ const struct inode *inode, \ struct folio *folio \ ), \ TP_ARGS(inode, folio)) DECLARE_EVENT_CLASS(nfs_folio_event_done, TP_PROTO( const struct inode *inode, struct folio *folio, int ret ), TP_ARGS(inode, folio, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(int, ret) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(u32, count) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->offset = folio_file_pos(folio); __entry->count = nfs_folio_length(folio); __entry->ret = ret; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " "offset=%lld count=%u ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->offset, __entry->count, __entry->ret ) ); #define DEFINE_NFS_FOLIO_EVENT_DONE(name) \ DEFINE_EVENT(nfs_folio_event_done, name, \ TP_PROTO( \ const struct inode *inode, \ struct folio *folio, \ int ret \ ), \ TP_ARGS(inode, folio, ret)) DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done); DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done); DEFINE_NFS_FOLIO_EVENT(nfs_invalidate_folio); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_launder_folio_done); TRACE_EVENT(nfs_aop_readahead, TP_PROTO( const struct inode *inode, loff_t pos, unsigned int nr_pages ), TP_ARGS(inode, pos, nr_pages), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(unsigned int, nr_pages) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->offset = pos; __entry->nr_pages = nr_pages; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld nr_pages=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->offset, __entry->nr_pages ) ); TRACE_EVENT(nfs_aop_readahead_done, TP_PROTO( const struct inode *inode, unsigned int nr_pages, int ret ), TP_ARGS(inode, nr_pages, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(int, ret) __field(u64, fileid) __field(u64, version) __field(loff_t, offset) __field(unsigned int, nr_pages) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); __entry->nr_pages = nr_pages; __entry->ret = ret; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu nr_pages=%u ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, __entry->nr_pages, __entry->ret ) ); TRACE_EVENT(nfs_initiate_read, TP_PROTO( const struct nfs_pgio_header *hdr ), TP_ARGS(hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, count) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_readpage_done, TP_PROTO( const struct rpc_task *task, const struct nfs_pgio_header *hdr ), TP_ARGS(task, hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(bool, eof) __field(int, error) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->eof = hdr->res.eof; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->eof ? " eof" : "" ) ); TRACE_EVENT(nfs_readpage_short, TP_PROTO( const struct rpc_task *task, const struct nfs_pgio_header *hdr ), TP_ARGS(task, hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(bool, eof) __field(int, error) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->eof = hdr->res.eof; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->eof ? " eof" : "" ) ); TRACE_EVENT(nfs_pgio_error, TP_PROTO( const struct nfs_pgio_header *hdr, int error, loff_t pos ), TP_ARGS(hdr, error, pos), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(loff_t, pos) __field(int, error) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->error = error; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk("error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u pos=%llu", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->pos ) ); TRACE_EVENT(nfs_initiate_write, TP_PROTO( const struct nfs_pgio_header *hdr ), TP_ARGS(hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, count) __field(unsigned long, stable) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; __entry->stable = hdr->args.stable; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u stable=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count, show_nfs_stable_how(__entry->stable) ) ); TRACE_EVENT(nfs_writeback_done, TP_PROTO( const struct rpc_task *task, const struct nfs_pgio_header *hdr ), TP_ARGS(task, hdr), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, arg_count) __field(u32, res_count) __field(int, error) __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = hdr->args.fh ? hdr->args.fh : &nfsi->fh; const struct nfs_writeverf *verf = hdr->res.verf; __entry->error = task->tk_status; __entry->offset = hdr->args.offset; __entry->arg_count = hdr->args.count; __entry->res_count = hdr->res.count; __entry->stable = verf->committed; memcpy(__entry->verifier, &verf->verifier, NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u res=%u stable=%s " "verifier=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, show_nfs_stable_how(__entry->stable), show_nfs4_verifier(__entry->verifier) ) ); DECLARE_EVENT_CLASS(nfs_page_error_class, TP_PROTO( const struct inode *inode, const struct nfs_page *req, int error ), TP_ARGS(inode, req, error), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(unsigned int, count) __field(int, error) ), TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->offset = req_offset(req); __entry->count = req->wb_bytes; __entry->error = error; ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, __entry->count ) ); #define DEFINE_NFS_PAGEERR_EVENT(name) \ DEFINE_EVENT(nfs_page_error_class, name, \ TP_PROTO( \ const struct inode *inode, \ const struct nfs_page *req, \ int error \ ), \ TP_ARGS(inode, req, error)) DEFINE_NFS_PAGEERR_EVENT(nfs_write_error); DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error); DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error); TRACE_EVENT(nfs_initiate_commit, TP_PROTO( const struct nfs_commit_data *data ), TP_ARGS(data), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = data->args.fh ? data->args.fh : &nfsi->fh; __entry->offset = data->args.offset; __entry->count = data->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_commit_done, TP_PROTO( const struct rpc_task *task, const struct nfs_commit_data *data ), TP_ARGS(task, data), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) __field(int, error) __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = data->args.fh ? data->args.fh : &nfsi->fh; const struct nfs_writeverf *verf = data->res.verf; __entry->error = task->tk_status; __entry->offset = data->args.offset; __entry->stable = verf->committed; memcpy(__entry->verifier, &verf->verifier, NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld stable=%s verifier=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, show_nfs_stable_how(__entry->stable), show_nfs4_verifier(__entry->verifier) ) ); #define nfs_show_direct_req_flags(v) \ __print_flags(v, "|", \ { NFS_ODIRECT_DO_COMMIT, "DO_COMMIT" }, \ { NFS_ODIRECT_RESCHED_WRITES, "RESCHED_WRITES" }, \ { NFS_ODIRECT_SHOULD_DIRTY, "SHOULD DIRTY" }, \ { NFS_ODIRECT_DONE, "DONE" } ) DECLARE_EVENT_CLASS(nfs_direct_req_class, TP_PROTO( const struct nfs_direct_req *dreq ), TP_ARGS(dreq), TP_STRUCT__entry( __field(dev_t, dev) __field(u64, fileid) __field(u32, fhandle) __field(loff_t, offset) __field(ssize_t, count) __field(ssize_t, error) __field(int, flags) ), TP_fast_assign( const struct inode *inode = dreq->inode; const struct nfs_inode *nfsi = NFS_I(inode); const struct nfs_fh *fh = &nfsi->fh; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = dreq->io_start; __entry->count = dreq->count; __entry->error = dreq->error; __entry->flags = dreq->flags; ), TP_printk( "error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zd flags=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, __entry->count, nfs_show_direct_req_flags(__entry->flags) ) ); #define DEFINE_NFS_DIRECT_REQ_EVENT(name) \ DEFINE_EVENT(nfs_direct_req_class, name, \ TP_PROTO( \ const struct nfs_direct_req *dreq \ ), \ TP_ARGS(dreq)) DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_commit_complete); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_resched_write); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_complete); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io); TRACE_EVENT(nfs_fh_to_dentry, TP_PROTO( const struct super_block *sb, const struct nfs_fh *fh, u64 fileid, int error ), TP_ARGS(sb, fh, fileid, error), TP_STRUCT__entry( __field(int, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) ), TP_fast_assign( __entry->error = error; __entry->dev = sb->s_dev; __entry->fileid = fileid; __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x ", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle ) ); TRACE_EVENT(nfs_mount_assign, TP_PROTO( const char *option, const char *value ), TP_ARGS(option, value), TP_STRUCT__entry( __string(option, option) __string(value, value) ), TP_fast_assign( __assign_str(option, option); __assign_str(value, value); ), TP_printk("option %s=%s", __get_str(option), __get_str(value) ) ); TRACE_EVENT(nfs_mount_option, TP_PROTO( const struct fs_parameter *param ), TP_ARGS(param), TP_STRUCT__entry( __string(option, param->key) ), TP_fast_assign( __assign_str(option, param->key); ), TP_printk("option %s", __get_str(option)) ); TRACE_EVENT(nfs_mount_path, TP_PROTO( const char *path ), TP_ARGS(path), TP_STRUCT__entry( __string(path, path) ), TP_fast_assign( __assign_str(path, path); ), TP_printk("path='%s'", __get_str(path)) ); DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, int error ), TP_ARGS(xdr, error), TP_STRUCT__entry( __field(unsigned int, task_id) __field(unsigned int, client_id) __field(u32, xid) __field(int, version) __field(unsigned long, error) __string(program, xdr->rqst->rq_task->tk_client->cl_program->name) __string(procedure, xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) ), TP_fast_assign( const struct rpc_rqst *rqstp = xdr->rqst; const struct rpc_task *task = rqstp->rq_task; __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->version = task->tk_client->cl_vers; __entry->error = error; __assign_str(program, task->tk_client->cl_program->name); __assign_str(procedure, task->tk_msg.rpc_proc->p_name); ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x %sv%d %s error=%ld (%s)", __entry->task_id, __entry->client_id, __entry->xid, __get_str(program), __entry->version, __get_str(procedure), -__entry->error, show_nfs_status(__entry->error) ) ); #define DEFINE_NFS_XDR_EVENT(name) \ DEFINE_EVENT(nfs_xdr_event, name, \ TP_PROTO( \ const struct xdr_stream *xdr, \ int error \ ), \ TP_ARGS(xdr, error)) DEFINE_NFS_XDR_EVENT(nfs_xdr_status); DEFINE_NFS_XDR_EVENT(nfs_xdr_bad_filehandle); #endif /* _TRACE_NFS_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE nfstrace /* This part must be outside protection */ #include <trace/define_trace.h> |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ /* * aoenet.c * Ethernet portion of AoE driver */ #include <linux/gfp.h> #include <linux/hdreg.h> #include <linux/blkdev.h> #include <linux/netdevice.h> #include <linux/moduleparam.h> #include <net/net_namespace.h> #include <asm/unaligned.h> #include "aoe.h" #define NECODES 5 static char *aoe_errlist[] = { "no such error", "unrecognized command code", "bad argument parameter", "device unavailable", "config string present", "unsupported version" }; enum { IFLISTSZ = 1024, }; static char aoe_iflist[IFLISTSZ]; module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600); MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=dev1[,dev2...]"); static wait_queue_head_t txwq; static struct ktstate kts; #ifndef MODULE static int __init aoe_iflist_setup(char *str) { strscpy(aoe_iflist, str, IFLISTSZ); return 1; } __setup("aoe_iflist=", aoe_iflist_setup); #endif static spinlock_t txlock; static struct sk_buff_head skbtxq; /* enters with txlock held */ static int tx(int id) __must_hold(&txlock) { struct sk_buff *skb; struct net_device *ifp; while ((skb = skb_dequeue(&skbtxq))) { spin_unlock_irq(&txlock); ifp = skb->dev; if (dev_queue_xmit(skb) == NET_XMIT_DROP && net_ratelimit()) pr_warn("aoe: packet could not be sent on %s. %s\n", ifp ? ifp->name : "netif", "consider increasing tx_queue_len"); spin_lock_irq(&txlock); } return 0; } int is_aoe_netif(struct net_device *ifp) { register char *p, *q; register int len; if (aoe_iflist[0] == '\0') return 1; p = aoe_iflist + strspn(aoe_iflist, WHITESPACE); for (; *p; p = q + strspn(q, WHITESPACE)) { q = p + strcspn(p, WHITESPACE); if (q != p) len = q - p; else len = strlen(p); /* last token in aoe_iflist */ if (strlen(ifp->name) == len && !strncmp(ifp->name, p, len)) return 1; if (q == p) break; } return 0; } int set_aoe_iflist(const char __user *user_str, size_t size) { if (size >= IFLISTSZ) return -EINVAL; if (copy_from_user(aoe_iflist, user_str, size)) { printk(KERN_INFO "aoe: copy from user failed\n"); return -EFAULT; } aoe_iflist[size] = 0x00; return 0; } void aoenet_xmit(struct sk_buff_head *queue) { struct sk_buff *skb, *tmp; ulong flags; skb_queue_walk_safe(queue, skb, tmp) { __skb_unlink(skb, queue); spin_lock_irqsave(&txlock, flags); skb_queue_tail(&skbtxq, skb); spin_unlock_irqrestore(&txlock, flags); wake_up(&txwq); } } /* * (1) len doesn't include the header by default. I want this. */ static int aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) { struct aoe_hdr *h; struct aoe_atahdr *ah; u32 n; int sn; if (dev_net(ifp) != &init_net) goto exit; skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) return 0; if (!is_aoe_netif(ifp)) goto exit; skb_push(skb, ETH_HLEN); /* (1) */ sn = sizeof(*h) + sizeof(*ah); if (skb->len >= sn) { sn -= skb_headlen(skb); if (sn > 0 && !__pskb_pull_tail(skb, sn)) goto exit; } h = (struct aoe_hdr *) skb->data; n = get_unaligned_be32(&h->tag); if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31)) goto exit; if (h->verfl & AOEFL_ERR) { n = h->err; if (n > NECODES) n = 0; if (net_ratelimit()) printk(KERN_ERR "%s%d.%d@%s; ecode=%d '%s'\n", "aoe: error packet from ", get_unaligned_be16(&h->major), h->minor, skb->dev->name, h->err, aoe_errlist[n]); goto exit; } switch (h->cmd) { case AOECMD_ATA: /* ata_rsp may keep skb for later processing or give it back */ skb = aoecmd_ata_rsp(skb); break; case AOECMD_CFG: aoecmd_cfg_rsp(skb); break; default: if (h->cmd >= AOECMD_VEND_MIN) break; /* don't complain about vendor commands */ pr_info("aoe: unknown AoE command type 0x%02x\n", h->cmd); break; } if (!skb) return 0; exit: dev_kfree_skb(skb); return 0; } static struct packet_type aoe_pt __read_mostly = { .type = __constant_htons(ETH_P_AOE), .func = aoenet_rcv, }; int __init aoenet_init(void) { skb_queue_head_init(&skbtxq); init_waitqueue_head(&txwq); spin_lock_init(&txlock); kts.lock = &txlock; kts.fn = tx; kts.waitq = &txwq; kts.id = 0; snprintf(kts.name, sizeof(kts.name), "aoe_tx%d", kts.id); if (aoe_ktstart(&kts)) return -EAGAIN; dev_add_pack(&aoe_pt); return 0; } void aoenet_exit(void) { aoe_ktstop(&kts); skb_queue_purge(&skbtxq); dev_remove_pack(&aoe_pt); } |
290 290 298 63 295 58 16 16 109 62 81 43 16 18 95 101 67 105 9 12 224 224 109 87 109 112 112 4 264 265 1 3 66 75 51 75 75 36 62 76 77 1 7 414 262 380 167 322 362 78 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 | // SPDX-License-Identifier: GPL-2.0-only /* * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 * Home page: * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC * This is from the implementation of CUBIC TCP in * Sangtae Ha, Injong Rhee and Lisong Xu, * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" * in ACM SIGOPS Operating System Review, July 2008. * Available from: * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf * * CUBIC integrates a new slow start algorithm, called HyStart. * The details of HyStart are presented in * Sangtae Ha and Injong Rhee, * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. * Available from: * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf * * All testing results are available from: * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing * * Unless CUBIC is enabled and congestion window is large * this behaves the same as the original Reno. */ #include <linux/mm.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/module.h> #include <linux/math64.h> #include <net/tcp.h> #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation * max_cwnd = snd_cwnd * beta */ #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ /* Two methods of hybrid slow start */ #define HYSTART_ACK_TRAIN 0x1 #define HYSTART_DELAY 0x2 /* Number of delay samples for detecting the increase of delay */ #define HYSTART_MIN_SAMPLES 8 #define HYSTART_DELAY_MIN (4000U) /* 4 ms */ #define HYSTART_DELAY_MAX (16000U) /* 16 ms */ #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) static int fast_convergence __read_mostly = 1; static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ static int initial_ssthresh __read_mostly; static int bic_scale __read_mostly = 41; static int tcp_friendliness __read_mostly = 1; static int hystart __read_mostly = 1; static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; static int hystart_low_window __read_mostly = 16; static int hystart_ack_delta_us __read_mostly = 2000; static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; static u64 cube_factor __read_mostly; /* Note parameters that are used for precomputing scale factors are read-only */ module_param(fast_convergence, int, 0644); MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); module_param(beta, int, 0644); MODULE_PARM_DESC(beta, "beta for multiplicative increase"); module_param(initial_ssthresh, int, 0644); MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); module_param(bic_scale, int, 0444); MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); module_param(tcp_friendliness, int, 0644); MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); module_param(hystart, int, 0644); MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); module_param(hystart_detect, int, 0644); MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms" " 1: packet-train 2: delay 3: both packet-train and delay"); module_param(hystart_low_window, int, 0644); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); module_param(hystart_ack_delta_us, int, 0644); MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)"); /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ u32 bic_K; /* time to origin point from the beginning of the current epoch */ u32 delay_min; /* min delay (usec) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ u16 unused; u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ u32 end_seq; /* end_seq of the round */ u32 last_ack; /* last time when the ACK spacing is close */ u32 curr_rtt; /* the minimum rtt of current round */ }; static inline void bictcp_reset(struct bictcp *ca) { memset(ca, 0, offsetof(struct bictcp, unused)); ca->found = 0; } static inline u32 bictcp_clock_us(const struct sock *sk) { return tcp_sk(sk)->tcp_mstamp; } static inline void bictcp_hystart_reset(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); ca->round_start = ca->last_ack = bictcp_clock_us(sk); ca->end_seq = tp->snd_nxt; ca->curr_rtt = ~0U; ca->sample_cnt = 0; } __bpf_kfunc static void cubictcp_init(struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); if (hystart) bictcp_hystart_reset(sk); if (!hystart && initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } __bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); u32 now = tcp_jiffies32; s32 delta; delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while. * Shift epoch_start to keep cwnd growth to cubic curve. */ if (ca->epoch_start && delta > 0) { ca->epoch_start += delta; if (after(ca->epoch_start, now)) ca->epoch_start = now; } return; } } /* calculate the cubic root of x using a table lookup followed by one * Newton-Raphson iteration. * Avg err ~= 0.195% */ static u32 cubic_root(u64 a) { u32 x, b, shift; /* * cbrt(x) MSB values for x MSB values in [0..63]. * Precomputed then refined by hand - Willy Tarreau * * For x in [0..63], * v = cbrt(x << 18) - 1 * cbrt(x) = (v[x] + 10) >> 6 */ static const u8 v[] = { /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, }; b = fls64(a); if (b < 7) { /* a in [0..63] */ return ((u32)v[(u32)a] + 35) >> 6; } b = ((b * 84) >> 8) - 1; shift = (a >> (b * 3)); x = ((u32)(((u32)v[shift] + 10) << b)) >> 6; /* * Newton-Raphson iteration * 2 * x = ( 2 * x + a / x ) / 3 * k+1 k k */ x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1))); x = ((x * 341) >> 10); return x; } /* * Compute congestion window to use. */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) { u32 delta, bic_target, max_cnt; u64 offs, t; ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; /* The CUBIC function can update ca->cnt at most once per jiffy. * On all cwnd reduction events, ca->epoch_start is set to 0, * which will force a recalculation of ca->cnt. */ if (ca->epoch_start && tcp_jiffies32 == ca->last_time) goto tcp_friendliness; ca->last_cwnd = cwnd; ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) { ca->epoch_start = tcp_jiffies32; /* record beginning */ ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ if (ca->last_max_cwnd <= cwnd) { ca->bic_K = 0; ca->bic_origin_point = cwnd; } else { /* Compute new K based on * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) */ ca->bic_K = cubic_root(cube_factor * (ca->last_max_cwnd - cwnd)); ca->bic_origin_point = ca->last_max_cwnd; } } /* cubic function - calc*/ /* calculate c * time^3 / rtt, * while considering overflow in calculation of time^3 * (so time^3 is done by using 64 bit) * and without the support of division of 64bit numbers * (so all divisions are done by using 32 bit) * also NOTE the unit of those veriables * time = (t - K) / 2^bictcp_HZ * c = bic_scale >> 10 * rtt = (srtt >> 3) / HZ * !!! The following code does not have overflow problems, * if the cwnd < 1 million packets !!! */ t = (s32)(tcp_jiffies32 - ca->epoch_start); t += usecs_to_jiffies(ca->delay_min); /* change the unit from HZ to bictcp_HZ */ t <<= BICTCP_HZ; do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; else offs = t - ca->bic_K; /* c/rtt * (t-K)^3 */ delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); if (t < ca->bic_K) /* below origin*/ bic_target = ca->bic_origin_point - delta; else /* above origin*/ bic_target = ca->bic_origin_point + delta; /* cubic function - calc bictcp_cnt*/ if (bic_target > cwnd) { ca->cnt = cwnd / (bic_target - cwnd); } else { ca->cnt = 100 * cwnd; /* very small increment*/ } /* * The initial growth of cubic function may be too conservative * when the available bandwidth is still unknown. */ if (ca->last_max_cwnd == 0 && ca->cnt > 20) ca->cnt = 20; /* increase cwnd 5% per RTT */ tcp_friendliness: /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; delta = (cwnd * scale) >> 3; while (ca->ack_cnt > delta) { /* update tcp cwnd */ ca->ack_cnt -= delta; ca->tcp_cwnd++; } if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ delta = ca->tcp_cwnd - cwnd; max_cnt = cwnd / delta; if (ca->cnt > max_cnt) ca->cnt = max_cnt; } } /* The maximum rate of cwnd increase CUBIC allows is 1 packet per * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. */ ca->cnt = max(ca->cnt, 2U); } __bpf_kfunc static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } bictcp_update(ca, tcp_snd_cwnd(tp), acked); tcp_cong_avoid_ai(tp, ca->cnt, acked); } __bpf_kfunc static u32 cubictcp_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else ca->last_max_cwnd = tcp_snd_cwnd(tp); return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } __bpf_kfunc static void cubictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); bictcp_hystart_reset(sk); } } /* Account for TSO/GRO delays. * Otherwise short RTT flows could get too small ssthresh, since during * slow start we begin with small TSO packets and ca->delay_min would * not account for long aggregation delay when TSO packets get bigger. * Ideally even with a very small RTT we would like to have at least one * TSO packet being sent and received by GRO, and another one in qdisc layer. * We apply another 100% factor because @rate is doubled at this point. * We cap the cushion to 1ms. */ static u32 hystart_ack_delay(const struct sock *sk) { unsigned long rate; rate = READ_ONCE(sk->sk_pacing_rate); if (!rate) return 0; return min_t(u64, USEC_PER_MSEC, div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate)); } static void hystart_update(struct sock *sk, u32 delay) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 threshold; if (after(tp->snd_una, ca->end_seq)) bictcp_hystart_reset(sk); if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock_us(sk); /* first detection parameter - ack-train detection */ if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) { ca->last_ack = now; threshold = ca->delay_min + hystart_ack_delay(sk); /* Hystart ack train triggers if we get ack past * ca->delay_min/2. * Pacing might have delayed packets up to RTT/2 * during slow start. */ if (sk->sk_pacing_status == SK_PACING_NONE) threshold >>= 1; if ((s32)(now - ca->round_start) > threshold) { ca->found = 1; pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n", now - ca->round_start, threshold, ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp)); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ if (ca->curr_rtt > delay) ca->curr_rtt = delay; if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { ca->found = 1; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } } __bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; /* Some calls are for duplicates without timetamps */ if (sample->rtt_us < 0) return; /* Discard delay samples right after fast recovery */ if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ) return; delay = sample->rtt_us; if (delay == 0) delay = 1; /* first time call or link delay decreases */ if (ca->delay_min == 0 || ca->delay_min > delay) ca->delay_min = delay; /* hystart triggers when cwnd is larger than some threshold */ if (!ca->found && tcp_in_slow_start(tp) && hystart && tcp_snd_cwnd(tp) >= hystart_low_window) hystart_update(sk, delay); } static struct tcp_congestion_ops cubictcp __read_mostly = { .init = cubictcp_init, .ssthresh = cubictcp_recalc_ssthresh, .cong_avoid = cubictcp_cong_avoid, .set_state = cubictcp_state, .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = cubictcp_cwnd_event, .pkts_acked = cubictcp_acked, .owner = THIS_MODULE, .name = "cubic", }; BTF_SET8_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, cubictcp_init) BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) BTF_ID_FLAGS(func, cubictcp_cong_avoid) BTF_ID_FLAGS(func, cubictcp_state) BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif BTF_SET8_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { .owner = THIS_MODULE, .set = &tcp_cubic_check_kfunc_ids, }; static int __init cubictcp_register(void) { int ret; BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms */ beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 / (BICTCP_BETA_SCALE - beta); cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 * so K = cubic_root( (wmax-cwnd)*rtt/c ) * the unit of K is bictcp_HZ=2^10, not HZ * * c = bic_scale >> 10 * rtt = 100ms * * the following code has been designed and tested for * cwnd < 1 million packets * RTT < 100 seconds * HZ < 1,000,00 (corresponding to 10 nano-second) */ /* 1/c * 2^2*bictcp_HZ * srtt */ cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */ /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set); if (ret < 0) return ret; return tcp_register_congestion_control(&cubictcp); } static void __exit cubictcp_unregister(void) { tcp_unregister_congestion_control(&cubictcp); } module_init(cubictcp_register); module_exit(cubictcp_unregister); MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CUBIC TCP"); MODULE_VERSION("2.3"); |
4 4 5 7 2 5 7 1 71 71 3 3 3 3 3 3 3 6 6 3 3 3 3 3 3 3 6 6 6 6 6 6 6 6 10 10 10 10 7 3 3 7 7 4 3 7 7 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 66 66 66 65 1 1 1 1 638 627 6 5 174 173 2053 2054 366 366 3 6 6 6 6 1 6 182 181 183 182 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Handle firewalling * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> * Bart De Schuymer <bdschuym@pandora.be> * * Lennert dedicates this file to Kerstin Wurdinger. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> #include <linux/netfilter_bridge.h> #include <uapi/linux/netfilter_bridge.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_arp.h> #include <linux/in_route.h> #include <linux/rculist.h> #include <linux/inetdevice.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/addrconf.h> #include <net/route.h> #include <net/netfilter/br_netfilter.h> #include <net/netns/generic.h> #include <linux/uaccess.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack_core.h> #endif static unsigned int brnf_net_id __read_mostly; struct brnf_net { bool enabled; #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_hdr; #endif /* default value is 1 */ int call_iptables; int call_ip6tables; int call_arptables; /* default value is 0 */ int filter_vlan_tagged; int filter_pppoe_tagged; int pass_vlan_indev; }; #define IS_IP(skb) \ (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) #define IS_IPV6(skb) \ (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) #define IS_ARP(skb) \ (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) static inline __be16 vlan_proto(const struct sk_buff *skb) { if (skb_vlan_tag_present(skb)) return skb->protocol; else if (skb->protocol == htons(ETH_P_8021Q)) return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; else return 0; } static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; } static inline bool is_vlan_ipv6(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return vlan_proto(skb) == htons(ETH_P_IPV6) && brnet->filter_vlan_tagged; } static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; } static inline __be16 pppoe_proto(const struct sk_buff *skb) { return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + sizeof(struct pppoe_hdr))); } static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return skb->protocol == htons(ETH_P_PPP_SES) && pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; } static inline bool is_pppoe_ipv6(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return skb->protocol == htons(ETH_P_PPP_SES) && pppoe_proto(skb) == htons(PPP_IPV6) && brnet->filter_pppoe_tagged; } /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) struct brnf_frag_data { char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; u8 size; u16 vlan_tci; __be16 vlan_proto; }; static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); static void nf_bridge_info_free(struct sk_buff *skb) { skb_ext_del(skb, SKB_EXT_BRIDGE_NF); } static inline struct net_device *bridge_parent(const struct net_device *dev) { struct net_bridge_port *port; port = br_port_get_rcu(dev); return port ? port->br->dev : NULL; } static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) { return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); } unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) { switch (skb->protocol) { case __cpu_to_be16(ETH_P_8021Q): return VLAN_HLEN; case __cpu_to_be16(ETH_P_PPP_SES): return PPPOE_SES_HLEN; default: return 0; } } static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); skb_pull(skb, len); skb->network_header += len; } static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); skb_pull_rcsum(skb, len); skb->network_header += len; } /* When handing a packet over to the IP layer * check whether we have a skb that is in the * expected format */ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) { const struct iphdr *iph; u32 len; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; iph = ip_hdr(skb); /* Basic sanity checks */ if (iph->ihl < 5 || iph->version != 4) goto inhdr_error; if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto csum_error; len = skb_ip_totlen(skb); if (skb->len < len) { __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; if (pskb_trim_rcsum(skb, len)) { __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); /* We should really parse IP options here but until * somebody who actually uses IP options complains to * us we'll just silently ignore the options because * we're lazy! */ return 0; csum_error: __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: return -1; } void nf_bridge_update_protocol(struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); switch (nf_bridge->orig_proto) { case BRNF_PROTO_8021Q: skb->protocol = htons(ETH_P_8021Q); break; case BRNF_PROTO_PPPOE: skb->protocol = htons(ETH_P_PPP_SES); break; case BRNF_PROTO_UNCHANGED: break; } } /* Obtain the correct destination MAC address, while preserving the original * source MAC address. If we already know this address, we just copy it. If we * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) { struct neighbour *neigh; struct dst_entry *dst; skb->dev = bridge_parent(skb->dev); if (!skb->dev) goto free_skb; dst = skb_dst(skb); neigh = dst_neigh_lookup_skb(dst, skb); if (neigh) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && READ_ONCE(neigh->hh.hh_len)) { struct net_device *br_indev; br_indev = nf_bridge_get_physindev(skb, net); if (!br_indev) { neigh_release(neigh); goto free_skb; } neigh_hh_bridge(&neigh->hh, skb); skb->dev = br_indev; ret = br_handle_frame_finish(net, sk, skb); } else { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and * protocol number. */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), nf_bridge->neigh_header, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->bridged_dnat = 1; /* FIXME Need to refragment */ ret = READ_ONCE(neigh->output)(neigh, skb); } neigh_release(neigh); return ret; } free_skb: kfree_skb(skb); return 0; } static inline bool br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, const struct nf_bridge_info *nf_bridge) { return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * This is also true when SNAT takes place (for the reply direction). * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge * port group as it was received on. We can still bridge * the packet. * 2. The packet was DNAT'ed to a different device, either * a non-bridged device or another bridge port group. * The packet will need to be routed. * * The correct way of distinguishing between these two cases is to * call ip_route_input() and to look at skb->dst->dev, which is * changed to the destination device if ip_route_input() succeeds. * * Let's first consider the case that ip_route_input() succeeds: * * If the output device equals the logical bridge device the packet * came in on, we can consider this bridging. The corresponding MAC * address will be obtained in br_nf_pre_routing_finish_bridge. * Otherwise, the packet is considered to be routed and we just * change the destination MAC address so that the packet will * later be passed up to the IP stack to be routed. For a redirected * packet, ip_route_input() will give back the localhost as output device, * which differs from the bridge device. * * Let's now consider the case that ip_route_input() fails: * * This can be because the destination address is martian, in which case * the packet will be dropped. * If IP forwarding is disabled, ip_route_input() will fail, while * ip_route_output_key() can return success. The source * address for ip_route_output_key() is set to zero, so ip_route_output_key() * thinks we're handling a locally generated packet and won't care * if IP forwarding is enabled. If the output device equals the logical bridge * device, we proceed as if ip_route_input() succeeded. If it differs from the * logical bridge port or if ip_route_output_key() fails we drop the packet. */ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev, *br_indev; struct iphdr *iph = ip_hdr(skb); struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; br_indev = nf_bridge_get_physindev(skb, net); if (!br_indev) { kfree_skb(skb); return 0; } nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge->in_prerouting = 0; if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); /* If err equals -EHOSTUNREACH the error is due to a * martian destination or due to the fact that * forwarding is disabled. For most martian packets, * ip_route_output_key() will fail. It won't fail for 2 types of * martian destinations: loopback destinations and destination * 0.0.0.0. In both cases the packet will be dropped because the * destination is the loopback device and not the bridge. */ if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; rt = ip_route_output(net, iph->daddr, 0, RT_TOS(iph->tos), 0); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ if (rt->dst.dev == dev) { skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); goto bridged_dnat; } ip_rt_put(rt); } free_skb: kfree_skb(skb); return 0; } else { if (skb_dst(skb)->dev == dev) { bridged_dnat: skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); skb->pkt_type = PACKET_HOST; } } else { rt = bridge_parent_rtable(br_indev); if (!rt) { kfree_skb(skb); return 0; } skb_dst_drop(skb); skb_dst_set_noref(skb, &rt->dst); } skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_handle_frame_finish); return 0; } static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev, const struct net *net) { struct net_device *vlan, *br; struct brnf_net *brnet = net_generic(net, brnf_net_id); br = bridge_parent(dev); if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) return br; vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, skb_vlan_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; } /* Some common code for IPv4/IPv6 */ struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; } nf_bridge->in_prerouting = 1; nf_bridge->physinif = skb->dev->ifindex; skb->dev = brnf_get_logical_dev(skb, skb->dev, net); if (skb->protocol == htons(ETH_P_8021Q)) nf_bridge->orig_proto = BRNF_PROTO_8021Q; else if (skb->protocol == htons(ETH_P_PPP_SES)) nf_bridge->orig_proto = BRNF_PROTO_PPPOE; /* Must drop socket now because of tproxy. */ skb_orphan(skb); return skb->dev; } /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. * Replicate the checks that IPv4 does on packet reception. * Set skb->dev to the bridge device (i.e. parent of the * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ static unsigned int br_nf_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; struct net_bridge_port *p; struct net_bridge *br; __u32 len = nf_bridge_encap_header_len(skb); struct brnf_net *brnet; if (unlikely(!pskb_may_pull(skb, len))) return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); p = br_port_get_rcu(state->in); if (p == NULL) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); br = p->br; brnet = net_generic(state->net, brnf_net_id); if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || is_pppoe_ipv6(skb, state->net)) { if (!brnet->call_ip6tables && !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) return NF_ACCEPT; if (!ipv6_mod_enabled()) { pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0); } nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(priv, skb, state); } if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) return NF_ACCEPT; if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && !is_pppoe_ip(skb, state->net)) return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); if (br_validate_ipv4(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); if (!nf_bridge_alloc(skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); if (!setup_pre_routing(skb, state->net)) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge = nf_bridge_info_get(skb); nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; } #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* conntracks' nf_confirm logic cannot handle cloned skbs referencing * the same nf_conn entry, which will happen for multicast (broadcast) * Frames on bridges. * * Example: * macvlan0 * br0 * ethX ethY * * ethX (or Y) receives multicast or broadcast packet containing * an IP packet, not yet in conntrack table. * * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. * -> skb->_nfct now references a unconfirmed entry * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge * interface. * 3. skb gets passed up the stack. * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb * and schedules a work queue to send them out on the lower devices. * * The clone skb->_nfct is not a copy, it is the same entry as the * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. * * The Macvlan broadcast worker and normal confirm path will race. * * This race will not happen if step 2 already confirmed a clone. In that * case later steps perform skb_clone() with skb->_nfct already confirmed (in * hash table). This works fine. * * But such confirmation won't happen when eb/ip/nftables rules dropped the * packets before they reached the nf_confirm step in postrouting. * * Work around this problem by explicit confirmation of the entry at * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed * entry. * */ static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conntrack *nfct = skb_nfct(skb); const struct nf_ct_hook *ct_hook; struct nf_conn *ct; int ret; if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; ct = container_of(nfct, struct nf_conn, ct_general); if (likely(nf_ct_is_confirmed(ct))) return NF_ACCEPT; WARN_ON_ONCE(skb_shared(skb)); WARN_ON_ONCE(refcount_read(&nfct->use) != 1); /* We can't call nf_confirm here, it would create a dependency * on nf_conntrack module. */ ct_hook = rcu_dereference(nf_ct_hook); if (!ct_hook) { skb->_nfct = 0ul; nf_conntrack_put(nfct); return NF_ACCEPT; } nf_bridge_pull_encap_header(skb); ret = ct_hook->confirm(skb); switch (ret & NF_VERDICT_MASK) { case NF_STOLEN: return NF_STOLEN; default: nf_bridge_push_encap_header(skb); break; } ct = container_of(nfct, struct nf_conn, ct_general); WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); return ret; } #endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { if (skb->protocol == htons(ETH_P_IP)) nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (skb->protocol == htons(ETH_P_IPV6)) nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; in = nf_bridge_get_physindev(skb, net); if (!in) { kfree_skb(skb); return 0; } if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge_update_protocol(skb); } else { in = *((struct net_device **)(skb->cb)); } nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, br_forward_finish); return 0; } static unsigned int br_nf_forward_ip(struct sk_buff *skb, const struct nf_hook_state *state, u8 pf) { struct nf_bridge_info *nf_bridge; struct net_device *parent; nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) return NF_ACCEPT; /* Need exclusive nf_bridge_info since we might have multiple * different physoutdevs. */ if (!nf_bridge_unshare(skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); parent = bridge_parent(state->out); if (!parent) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge_pull_encap_header(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; } if (pf == NFPROTO_IPV4) { if (br_validate_ipv4(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; skb->protocol = htons(ETH_P_IP); } else if (pf == NFPROTO_IPV6) { if (br_validate_ipv6(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; skb->protocol = htons(ETH_P_IPV6); } else { WARN_ON_ONCE(1); return NF_DROP; } nf_bridge->physoutdev = skb->dev; NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, brnf_get_logical_dev(skb, state->in, state->net), parent, br_nf_forward_finish); return NF_STOLEN; } static unsigned int br_nf_forward_arp(struct sk_buff *skb, const struct nf_hook_state *state) { struct net_bridge_port *p; struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); struct brnf_net *brnet; p = br_port_get_rcu(state->out); if (p == NULL) return NF_ACCEPT; br = p->br; brnet = net_generic(state->net, brnf_net_id); if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) return NF_ACCEPT; if (is_vlan_arp(skb, state->net)) nf_bridge_pull_encap_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); if (arp_hdr(skb)->ar_pln != 4) { if (is_vlan_arp(skb, state->net)) nf_bridge_push_encap_header(skb); return NF_ACCEPT; } *d = state->in; NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, state->in, state->out, br_nf_forward_finish); return NF_STOLEN; } /* This is the 'purely bridged' case. For IP, we pass the packet to * netfilter with indev and outdev set to the bridge device, * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ static unsigned int br_nf_forward(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (IS_IP(skb) || is_vlan_ip(skb, state->net) || is_pppoe_ip(skb, state->net)) return br_nf_forward_ip(skb, state, NFPROTO_IPV4); if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || is_pppoe_ipv6(skb, state->net)) return br_nf_forward_ip(skb, state, NFPROTO_IPV6); if (IS_ARP(skb) || is_vlan_arp(skb, state->net)) return br_nf_forward_arp(skb, state); return NF_ACCEPT; } static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; int err; data = this_cpu_ptr(&brnf_frag_data_storage); err = skb_cow_head(skb, data->size); if (err) { kfree_skb(skb); return 0; } if (data->vlan_proto) __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); __skb_push(skb, data->encap_size); nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } static int br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { unsigned int mtu = ip_skb_dst_mtu(sk, skb); struct iphdr *iph = ip_hdr(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } return ip_do_fragment(net, sk, skb, output); } static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) return PPPOE_SES_HLEN; return 0; } static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); unsigned int mtu, mtu_reserved; mtu_reserved = nf_bridge_mtu_reduction(skb); mtu = skb->dev->mtu; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && skb->protocol == htons(ETH_P_IP)) { struct brnf_frag_data *data; if (br_validate_ipv4(net, skb)) goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; data = this_cpu_ptr(&brnf_frag_data_storage); if (skb_vlan_tag_present(skb)) { data->vlan_tci = skb->vlan_tci; data->vlan_proto = skb->vlan_proto; } else { data->vlan_proto = 0; } data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); } if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); struct brnf_frag_data *data; if (br_validate_ipv6(net, skb)) goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; data = this_cpu_ptr(&brnf_frag_data_storage); data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); if (v6ops) return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); kfree_skb(skb); return -EMSGSIZE; } nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); drop: kfree_skb(skb); return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in * on a bridge, but was delivered locally and is now being routed: * * POST_ROUTING was already invoked from the ip stack. */ if (!nf_bridge || !nf_bridge->physoutdev) return NF_ACCEPT; if (!realoutdev) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); if (IS_IP(skb) || is_vlan_ip(skb, state->net) || is_pppoe_ip(skb, state->net)) pf = NFPROTO_IPV4; else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || is_pppoe_ipv6(skb, state->net)) pf = NFPROTO_IPV6; else return NF_ACCEPT; if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; } nf_bridge_pull_encap_header(skb); if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, NULL, realoutdev, br_nf_dev_queue_xmit); return NF_STOLEN; } /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (nf_bridge) { if (nf_bridge->sabotage_in_done) return NF_ACCEPT; if (!nf_bridge->in_prerouting && !netif_is_l3_master(skb->dev) && !netif_is_l3_slave(skb->dev)) { nf_bridge->sabotage_in_done = 1; state->okfn(state->net, state->sk, skb); return NF_STOLEN; } } return NF_ACCEPT; } /* This is called when br_netfilter has called into iptables/netfilter, * and DNAT has taken place on a bridge-forwarded packet. * * neigh->output has created a new MAC header, with local br0 MAC * as saddr. * * This restores the original MAC saddr of the bridged packet * before invoking bridge forward logic to transmit the packet. */ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *br_indev; br_indev = nf_bridge_get_physindev(skb, dev_net(skb->dev)); if (!br_indev) { kfree_skb(skb); return; } skb_pull(skb, ETH_HLEN); nf_bridge->bridged_dnat = 0; BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), nf_bridge->neigh_header, ETH_HLEN - ETH_ALEN); skb->dev = br_indev; nf_bridge->physoutdev = NULL; br_handle_frame_finish(dev_net(skb->dev), NULL, skb); } static int br_nf_dev_xmit(struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (nf_bridge && nf_bridge->bridged_dnat) { br_nf_pre_routing_finish_bridge_slow(skb); return 1; } return 0; } static const struct nf_br_ops br_ops = { .br_dev_xmit_hook = br_nf_dev_xmit, }; /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static const struct nf_hook_ops br_nf_ops[] = { { .hook = br_nf_pre_routing, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, #if IS_ENABLED(CONFIG_NF_CONNTRACK) { .hook = br_nf_local_in, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_LAST, }, #endif { .hook = br_nf_forward, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_BRNF, }, { .hook = br_nf_post_routing, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_LAST, }, { .hook = ip_sabotage_in, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_FIRST, }, { .hook = ip_sabotage_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, }; static int brnf_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct brnf_net *brnet; struct net *net; int ret; if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) return NOTIFY_DONE; ASSERT_RTNL(); net = dev_net(dev); brnet = net_generic(net, brnf_net_id); if (brnet->enabled) return NOTIFY_OK; ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); if (ret) return NOTIFY_BAD; brnet->enabled = true; return NOTIFY_OK; } static struct notifier_block brnf_notifier __read_mostly = { .notifier_call = brnf_device_event, }; /* recursively invokes nf_hook_slow (again), skipping already-called * hooks (< NF_BR_PRI_BRNF). * * Called with rcu read lock held. */ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { const struct nf_hook_entries *e; struct nf_hook_state state; struct nf_hook_ops **ops; unsigned int i; int ret; e = rcu_dereference(net->nf.hooks_bridge[hook]); if (!e) return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); for (i = 0; i < e->num_hook_entries; i++) { /* These hooks have already been called */ if (ops[i]->priority < NF_BR_PRI_BRNF) continue; /* These hooks have not been called yet, run them. */ if (ops[i]->priority > NF_BR_PRI_BRNF) break; /* take a closer look at NF_BR_PRI_BRNF. */ if (ops[i]->hook == br_nf_pre_routing) { /* This hook diverted the skb to this function, * hooks after this have not been run yet. */ i++; break; } } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, e, i); if (ret == 1) ret = okfn(net, sk, skb); return ret; } #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *(int *)(ctl->data)) *(int *)(ctl->data) = 1; return ret; } static struct ctl_table brnf_table[] = { { .procname = "bridge-nf-call-arptables", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-iptables", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-ip6tables", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-vlan-tagged", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-pppoe-tagged", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-pass-vlan-input-dev", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { } }; static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) { brnf->call_iptables = 1; brnf->call_ip6tables = 1; brnf->call_arptables = 1; brnf->filter_vlan_tagged = 0; brnf->filter_pppoe_tagged = 0; brnf->pass_vlan_indev = 0; } static int br_netfilter_sysctl_init_net(struct net *net) { struct ctl_table *table = brnf_table; struct brnf_net *brnet; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); if (!table) return -ENOMEM; } brnet = net_generic(net, brnf_net_id); table[0].data = &brnet->call_arptables; table[1].data = &brnet->call_iptables; table[2].data = &brnet->call_ip6tables; table[3].data = &brnet->filter_vlan_tagged; table[4].data = &brnet->filter_pppoe_tagged; table[5].data = &brnet->pass_vlan_indev; br_netfilter_sysctl_default(brnet); brnet->ctl_hdr = register_net_sysctl_sz(net, "net/bridge", table, ARRAY_SIZE(brnf_table)); if (!brnet->ctl_hdr) { if (!net_eq(net, &init_net)) kfree(table); return -ENOMEM; } return 0; } static void br_netfilter_sysctl_exit_net(struct net *net, struct brnf_net *brnet) { struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; unregister_net_sysctl_table(brnet->ctl_hdr); if (!net_eq(net, &init_net)) kfree(table); } static int __net_init brnf_init_net(struct net *net) { return br_netfilter_sysctl_init_net(net); } #endif static void __net_exit brnf_exit_net(struct net *net) { struct brnf_net *brnet; brnet = net_generic(net, brnf_net_id); if (brnet->enabled) { nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); brnet->enabled = false; } #ifdef CONFIG_SYSCTL br_netfilter_sysctl_exit_net(net, brnet); #endif } static struct pernet_operations brnf_net_ops __read_mostly = { #ifdef CONFIG_SYSCTL .init = brnf_init_net, #endif .exit = brnf_exit_net, .id = &brnf_net_id, .size = sizeof(struct brnf_net), }; static int __init br_netfilter_init(void) { int ret; ret = register_pernet_subsys(&brnf_net_ops); if (ret < 0) return ret; ret = register_netdevice_notifier(&brnf_notifier); if (ret < 0) { unregister_pernet_subsys(&brnf_net_ops); return ret; } RCU_INIT_POINTER(nf_br_ops, &br_ops); printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; } static void __exit br_netfilter_fini(void) { RCU_INIT_POINTER(nf_br_ops, NULL); unregister_netdevice_notifier(&brnf_notifier); unregister_pernet_subsys(&brnf_net_ops); } module_init(br_netfilter_init); module_exit(br_netfilter_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); |
39 39 39 39 39 7 7 7 7 7 6 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 | // SPDX-License-Identifier: GPL-2.0-only /* * virtio transport for vsock * * Copyright (C) 2013-2015 Red Hat, Inc. * Author: Asias He <asias@redhat.com> * Stefan Hajnoczi <stefanha@redhat.com> * * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s * early virtio-vsock proof-of-concept bits. */ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/list.h> #include <linux/atomic.h> #include <linux/virtio.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> #include <linux/virtio_vsock.h> #include <net/sock.h> #include <linux/mutex.h> #include <net/af_vsock.h> static struct workqueue_struct *virtio_vsock_workqueue; static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ static struct virtio_transport virtio_transport; /* forward declaration */ struct virtio_vsock { struct virtio_device *vdev; struct virtqueue *vqs[VSOCK_VQ_MAX]; /* Virtqueue processing is deferred to a workqueue */ struct work_struct tx_work; struct work_struct rx_work; struct work_struct event_work; /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX] * must be accessed with tx_lock held. */ struct mutex tx_lock; bool tx_run; struct work_struct send_pkt_work; struct sk_buff_head send_pkt_queue; atomic_t queued_replies; /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] * must be accessed with rx_lock held. */ struct mutex rx_lock; bool rx_run; int rx_buf_nr; int rx_buf_max_nr; /* The following fields are protected by event_lock. * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held. */ struct mutex event_lock; bool event_run; struct virtio_vsock_event event_list[8]; u32 guest_cid; bool seqpacket_allow; /* These fields are used only in tx path in function * 'virtio_transport_send_pkt_work()', so to save * stack space in it, place both of them here. Each * pointer from 'out_sgs' points to the corresponding * element in 'out_bufs' - this is initialized in * 'virtio_vsock_probe()'. Both fields are protected * by 'tx_lock'. +1 is needed for packet header. */ struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1]; struct scatterlist out_bufs[MAX_SKB_FRAGS + 1]; }; static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock; u32 ret; rcu_read_lock(); vsock = rcu_dereference(the_virtio_vsock); if (!vsock) { ret = VMADDR_CID_ANY; goto out_rcu; } ret = vsock->guest_cid; out_rcu: rcu_read_unlock(); return ret; } static void virtio_transport_send_pkt_work(struct work_struct *work) { struct virtio_vsock *vsock = container_of(work, struct virtio_vsock, send_pkt_work); struct virtqueue *vq; bool added = false; bool restart_rx = false; mutex_lock(&vsock->tx_lock); if (!vsock->tx_run) goto out; vq = vsock->vqs[VSOCK_VQ_TX]; for (;;) { int ret, in_sg = 0, out_sg = 0; struct scatterlist **sgs; struct sk_buff *skb; bool reply; skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); if (!skb) break; virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); sgs = vsock->out_sgs; sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); out_sg++; if (!skb_is_nonlinear(skb)) { if (skb->len > 0) { sg_init_one(sgs[out_sg], skb->data, skb->len); out_sg++; } } else { struct skb_shared_info *si; int i; /* If skb is nonlinear, then its buffer must contain * only header and nothing more. Data is stored in * the fragged part. */ WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); si = skb_shinfo(skb); for (i = 0; i < si->nr_frags; i++) { skb_frag_t *skb_frag = &si->frags[i]; void *va; /* We will use 'page_to_virt()' for the userspace page * here, because virtio or dma-mapping layers will call * 'virt_to_phys()' later to fill the buffer descriptor. * We don't touch memory at "virtual" address of this page. */ va = page_to_virt(skb_frag_page(skb_frag)); sg_init_one(sgs[out_sg], va + skb_frag_off(skb_frag), skb_frag_size(skb_frag)); out_sg++; } } ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); /* Usually this means that there is no more space available in * the vq */ if (ret < 0) { virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); break; } if (reply) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int val; val = atomic_dec_return(&vsock->queued_replies); /* Do we now have resources to resume rx processing? */ if (val + 1 == virtqueue_get_vring_size(rx_vq)) restart_rx = true; } added = true; } if (added) virtqueue_kick(vq); out: mutex_unlock(&vsock->tx_lock); if (restart_rx) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } static int virtio_transport_send_pkt(struct sk_buff *skb) { struct virtio_vsock_hdr *hdr; struct virtio_vsock *vsock; int len = skb->len; hdr = virtio_vsock_hdr(skb); rcu_read_lock(); vsock = rcu_dereference(the_virtio_vsock); if (!vsock) { kfree_skb(skb); len = -ENODEV; goto out_rcu; } if (le64_to_cpu(hdr->dst_cid) == vsock->guest_cid) { kfree_skb(skb); len = -ENODEV; goto out_rcu; } if (virtio_vsock_skb_reply(skb)) atomic_inc(&vsock->queued_replies); virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); out_rcu: rcu_read_unlock(); return len; } static int virtio_transport_cancel_pkt(struct vsock_sock *vsk) { struct virtio_vsock *vsock; int cnt = 0, ret; rcu_read_lock(); vsock = rcu_dereference(the_virtio_vsock); if (!vsock) { ret = -ENODEV; goto out_rcu; } cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue); if (cnt) { struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; int new_cnt; new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) && new_cnt < virtqueue_get_vring_size(rx_vq)) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } ret = 0; out_rcu: rcu_read_unlock(); return ret; } static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) { int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM; struct scatterlist pkt, *p; struct virtqueue *vq; struct sk_buff *skb; int ret; vq = vsock->vqs[VSOCK_VQ_RX]; do { skb = virtio_vsock_alloc_skb(total_len, GFP_KERNEL); if (!skb) break; memset(skb->head, 0, VIRTIO_VSOCK_SKB_HEADROOM); sg_init_one(&pkt, virtio_vsock_hdr(skb), total_len); p = &pkt; ret = virtqueue_add_sgs(vq, &p, 0, 1, skb, GFP_KERNEL); if (ret < 0) { kfree_skb(skb); break; } vsock->rx_buf_nr++; } while (vq->num_free); if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) vsock->rx_buf_max_nr = vsock->rx_buf_nr; virtqueue_kick(vq); } static void virtio_transport_tx_work(struct work_struct *work) { struct virtio_vsock *vsock = container_of(work, struct virtio_vsock, tx_work); struct virtqueue *vq; bool added = false; vq = vsock->vqs[VSOCK_VQ_TX]; mutex_lock(&vsock->tx_lock); if (!vsock->tx_run) goto out; do { struct sk_buff *skb; unsigned int len; virtqueue_disable_cb(vq); while ((skb = virtqueue_get_buf(vq, &len)) != NULL) { consume_skb(skb); added = true; } } while (!virtqueue_enable_cb(vq)); out: mutex_unlock(&vsock->tx_lock); if (added) queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); } /* Is there space left for replies to rx packets? */ static bool virtio_transport_more_replies(struct virtio_vsock *vsock) { struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX]; int val; smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ val = atomic_read(&vsock->queued_replies); return val < virtqueue_get_vring_size(vq); } /* event_lock must be held */ static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, struct virtio_vsock_event *event) { struct scatterlist sg; struct virtqueue *vq; vq = vsock->vqs[VSOCK_VQ_EVENT]; sg_init_one(&sg, event, sizeof(*event)); return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL); } /* event_lock must be held */ static void virtio_vsock_event_fill(struct virtio_vsock *vsock) { size_t i; for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) { struct virtio_vsock_event *event = &vsock->event_list[i]; virtio_vsock_event_fill_one(vsock, event); } virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); } static void virtio_vsock_reset_sock(struct sock *sk) { /* vmci_transport.c doesn't take sk_lock here either. At least we're * under vsock_table_lock so the sock cannot disappear while we're * executing. */ sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk_error_report(sk); } static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) { struct virtio_device *vdev = vsock->vdev; __le64 guest_cid; vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), &guest_cid, sizeof(guest_cid)); vsock->guest_cid = le64_to_cpu(guest_cid); } /* event_lock must be held */ static void virtio_vsock_event_handle(struct virtio_vsock *vsock, struct virtio_vsock_event *event) { switch (le32_to_cpu(event->id)) { case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: virtio_vsock_update_guest_cid(vsock); vsock_for_each_connected_socket(&virtio_transport.transport, virtio_vsock_reset_sock); break; } } static void virtio_transport_event_work(struct work_struct *work) { struct virtio_vsock *vsock = container_of(work, struct virtio_vsock, event_work); struct virtqueue *vq; vq = vsock->vqs[VSOCK_VQ_EVENT]; mutex_lock(&vsock->event_lock); if (!vsock->event_run) goto out; do { struct virtio_vsock_event *event; unsigned int len; virtqueue_disable_cb(vq); while ((event = virtqueue_get_buf(vq, &len)) != NULL) { if (len == sizeof(*event)) virtio_vsock_event_handle(vsock, event); virtio_vsock_event_fill_one(vsock, event); } } while (!virtqueue_enable_cb(vq)); virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); out: mutex_unlock(&vsock->event_lock); } static void virtio_vsock_event_done(struct virtqueue *vq) { struct virtio_vsock *vsock = vq->vdev->priv; if (!vsock) return; queue_work(virtio_vsock_workqueue, &vsock->event_work); } static void virtio_vsock_tx_done(struct virtqueue *vq) { struct virtio_vsock *vsock = vq->vdev->priv; if (!vsock) return; queue_work(virtio_vsock_workqueue, &vsock->tx_work); } static void virtio_vsock_rx_done(struct virtqueue *vq) { struct virtio_vsock *vsock = vq->vdev->priv; if (!vsock) return; queue_work(virtio_vsock_workqueue, &vsock->rx_work); } static bool virtio_transport_can_msgzerocopy(int bufs_num) { struct virtio_vsock *vsock; bool res = false; rcu_read_lock(); vsock = rcu_dereference(the_virtio_vsock); if (vsock) { struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; /* Check that tx queue is large enough to keep whole * data to send. This is needed, because when there is * not enough free space in the queue, current skb to * send will be reinserted to the head of tx list of * the socket to retry transmission later, so if skb * is bigger than whole queue, it will be reinserted * again and again, thus blocking other skbs to be sent. * Each page of the user provided buffer will be added * as a single buffer to the tx virtqueue, so compare * number of pages against maximum capacity of the queue. */ if (bufs_num <= vq->num_max) res = true; } rcu_read_unlock(); return res; } static bool virtio_transport_msgzerocopy_allow(void) { return true; } static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { .transport = { .module = THIS_MODULE, .get_local_cid = virtio_transport_get_local_cid, .init = virtio_transport_do_socket_init, .destruct = virtio_transport_destruct, .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, .cancel_pkt = virtio_transport_cancel_pkt, .dgram_bind = virtio_transport_dgram_bind, .dgram_dequeue = virtio_transport_dgram_dequeue, .dgram_enqueue = virtio_transport_dgram_enqueue, .dgram_allow = virtio_transport_dgram_allow, .stream_dequeue = virtio_transport_stream_dequeue, .stream_enqueue = virtio_transport_stream_enqueue, .stream_has_data = virtio_transport_stream_has_data, .stream_has_space = virtio_transport_stream_has_space, .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, .stream_is_active = virtio_transport_stream_is_active, .stream_allow = virtio_transport_stream_allow, .seqpacket_dequeue = virtio_transport_seqpacket_dequeue, .seqpacket_enqueue = virtio_transport_seqpacket_enqueue, .seqpacket_allow = virtio_transport_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, .msgzerocopy_allow = virtio_transport_msgzerocopy_allow, .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, .notify_send_init = virtio_transport_notify_send_init, .notify_send_pre_block = virtio_transport_notify_send_pre_block, .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .read_skb = virtio_transport_read_skb, }, .send_pkt = virtio_transport_send_pkt, .can_msgzerocopy = virtio_transport_can_msgzerocopy, }; static bool virtio_transport_seqpacket_allow(u32 remote_cid) { struct virtio_vsock *vsock; bool seqpacket_allow; seqpacket_allow = false; rcu_read_lock(); vsock = rcu_dereference(the_virtio_vsock); if (vsock) seqpacket_allow = vsock->seqpacket_allow; rcu_read_unlock(); return seqpacket_allow; } static void virtio_transport_rx_work(struct work_struct *work) { struct virtio_vsock *vsock = container_of(work, struct virtio_vsock, rx_work); struct virtqueue *vq; vq = vsock->vqs[VSOCK_VQ_RX]; mutex_lock(&vsock->rx_lock); if (!vsock->rx_run) goto out; do { virtqueue_disable_cb(vq); for (;;) { struct sk_buff *skb; unsigned int len; if (!virtio_transport_more_replies(vsock)) { /* Stop rx until the device processes already * pending replies. Leave rx virtqueue * callbacks disabled. */ goto out; } skb = virtqueue_get_buf(vq, &len); if (!skb) break; vsock->rx_buf_nr--; /* Drop short/long packets */ if (unlikely(len < sizeof(struct virtio_vsock_hdr) || len > virtio_vsock_skb_len(skb))) { kfree_skb(skb); continue; } virtio_vsock_skb_rx_put(skb); virtio_transport_deliver_tap_pkt(skb); virtio_transport_recv_pkt(&virtio_transport, skb); } } while (!virtqueue_enable_cb(vq)); out: if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) virtio_vsock_rx_fill(vsock); mutex_unlock(&vsock->rx_lock); } static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) { struct virtio_device *vdev = vsock->vdev; static const char * const names[] = { "rx", "tx", "event", }; vq_callback_t *callbacks[] = { virtio_vsock_rx_done, virtio_vsock_tx_done, virtio_vsock_event_done, }; int ret; ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names, NULL); if (ret < 0) return ret; virtio_vsock_update_guest_cid(vsock); virtio_device_ready(vdev); return 0; } static void virtio_vsock_vqs_start(struct virtio_vsock *vsock) { mutex_lock(&vsock->tx_lock); vsock->tx_run = true; mutex_unlock(&vsock->tx_lock); mutex_lock(&vsock->rx_lock); virtio_vsock_rx_fill(vsock); vsock->rx_run = true; mutex_unlock(&vsock->rx_lock); mutex_lock(&vsock->event_lock); virtio_vsock_event_fill(vsock); vsock->event_run = true; mutex_unlock(&vsock->event_lock); /* virtio_transport_send_pkt() can queue packets once * the_virtio_vsock is set, but they won't be processed until * vsock->tx_run is set to true. We queue vsock->send_pkt_work * when initialization finishes to send those packets queued * earlier. * We don't need to queue the other workers (rx, event) because * as long as we don't fill the queues with empty buffers, the * host can't send us any notification. */ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); } static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) { struct virtio_device *vdev = vsock->vdev; struct sk_buff *skb; /* Reset all connected sockets when the VQs disappear */ vsock_for_each_connected_socket(&virtio_transport.transport, virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, * so we can safely call virtio_reset_device(). */ mutex_lock(&vsock->rx_lock); vsock->rx_run = false; mutex_unlock(&vsock->rx_lock); mutex_lock(&vsock->tx_lock); vsock->tx_run = false; mutex_unlock(&vsock->tx_lock); mutex_lock(&vsock->event_lock); vsock->event_run = false; mutex_unlock(&vsock->event_lock); /* Flush all device writes and interrupts, device will not use any * more buffers. */ virtio_reset_device(vdev); mutex_lock(&vsock->rx_lock); while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) kfree_skb(skb); mutex_unlock(&vsock->rx_lock); mutex_lock(&vsock->tx_lock); while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX]))) kfree_skb(skb); mutex_unlock(&vsock->tx_lock); virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue); /* Delete virtqueues and flush outstanding callbacks if any */ vdev->config->del_vqs(vdev); } static int virtio_vsock_probe(struct virtio_device *vdev) { struct virtio_vsock *vsock = NULL; int ret; int i; ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); if (ret) return ret; /* Only one virtio-vsock device per guest is supported */ if (rcu_dereference_protected(the_virtio_vsock, lockdep_is_held(&the_virtio_vsock_mutex))) { ret = -EBUSY; goto out; } vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); if (!vsock) { ret = -ENOMEM; goto out; } vsock->vdev = vdev; vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); mutex_init(&vsock->tx_lock); mutex_init(&vsock->rx_lock); mutex_init(&vsock->event_lock); skb_queue_head_init(&vsock->send_pkt_queue); INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); INIT_WORK(&vsock->event_work, virtio_transport_event_work); INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) vsock->seqpacket_allow = true; vdev->priv = vsock; ret = virtio_vsock_vqs_init(vsock); if (ret < 0) goto out; for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++) vsock->out_sgs[i] = &vsock->out_bufs[i]; rcu_assign_pointer(the_virtio_vsock, vsock); virtio_vsock_vqs_start(vsock); mutex_unlock(&the_virtio_vsock_mutex); return 0; out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); return ret; } static void virtio_vsock_remove(struct virtio_device *vdev) { struct virtio_vsock *vsock = vdev->priv; mutex_lock(&the_virtio_vsock_mutex); vdev->priv = NULL; rcu_assign_pointer(the_virtio_vsock, NULL); synchronize_rcu(); virtio_vsock_vqs_del(vsock); /* Other works can be queued before 'config->del_vqs()', so we flush * all works before to free the vsock object to avoid use after free. */ flush_work(&vsock->rx_work); flush_work(&vsock->tx_work); flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); mutex_unlock(&the_virtio_vsock_mutex); kfree(vsock); } #ifdef CONFIG_PM_SLEEP static int virtio_vsock_freeze(struct virtio_device *vdev) { struct virtio_vsock *vsock = vdev->priv; mutex_lock(&the_virtio_vsock_mutex); rcu_assign_pointer(the_virtio_vsock, NULL); synchronize_rcu(); virtio_vsock_vqs_del(vsock); mutex_unlock(&the_virtio_vsock_mutex); return 0; } static int virtio_vsock_restore(struct virtio_device *vdev) { struct virtio_vsock *vsock = vdev->priv; int ret; mutex_lock(&the_virtio_vsock_mutex); /* Only one virtio-vsock device per guest is supported */ if (rcu_dereference_protected(the_virtio_vsock, lockdep_is_held(&the_virtio_vsock_mutex))) { ret = -EBUSY; goto out; } ret = virtio_vsock_vqs_init(vsock); if (ret < 0) goto out; rcu_assign_pointer(the_virtio_vsock, vsock); virtio_vsock_vqs_start(vsock); out: mutex_unlock(&the_virtio_vsock_mutex); return ret; } #endif /* CONFIG_PM_SLEEP */ static struct virtio_device_id id_table[] = { { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, { 0 }, }; static unsigned int features[] = { VIRTIO_VSOCK_F_SEQPACKET }; static struct virtio_driver virtio_vsock_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtio_vsock_probe, .remove = virtio_vsock_remove, #ifdef CONFIG_PM_SLEEP .freeze = virtio_vsock_freeze, .restore = virtio_vsock_restore, #endif }; static int __init virtio_vsock_init(void) { int ret; virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; ret = vsock_core_register(&virtio_transport.transport, VSOCK_TRANSPORT_F_G2H); if (ret) goto out_wq; ret = register_virtio_driver(&virtio_vsock_driver); if (ret) goto out_vci; return 0; out_vci: vsock_core_unregister(&virtio_transport.transport); out_wq: destroy_workqueue(virtio_vsock_workqueue); return ret; } static void __exit virtio_vsock_exit(void) { unregister_virtio_driver(&virtio_vsock_driver); vsock_core_unregister(&virtio_transport.transport); destroy_workqueue(virtio_vsock_workqueue); } module_init(virtio_vsock_init); module_exit(virtio_vsock_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Asias He"); MODULE_DESCRIPTION("virtio transport for vsock"); MODULE_DEVICE_TABLE(virtio, id_table); |
10 10 15 5 10 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/isofs/joliet.c * * (C) 1996 Gordon Chaffee * * Joliet: Microsoft's Unicode extensions to iso9660 */ #include <linux/types.h> #include <linux/nls.h> #include "isofs.h" /* * Convert Unicode 16 to UTF-8 or ASCII. */ static int uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) { __be16 *ip, ch; unsigned char *op; ip = uni; op = ascii; while ((ch = get_unaligned(ip)) && len) { int llen; llen = nls->uni2char(be16_to_cpu(ch), op, NLS_MAX_CHARSET_SIZE); if (llen > 0) op += llen; else *op++ = '?'; ip++; len--; } *op = 0; return (op - ascii); } int get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) { struct nls_table *nls; unsigned char len = 0; nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; if (!nls) { len = utf16s_to_utf8s((const wchar_t *) de->name, de->name_len[0] >> 1, UTF16_BIG_ENDIAN, outname, PAGE_SIZE); } else { len = uni16_to_x8(outname, (__be16 *) de->name, de->name_len[0] >> 1, nls); } if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1')) len -= 2; /* * Windows doesn't like periods at the end of a name, * so neither do we */ while (len >= 2 && (outname[len-1] == '.')) len--; return len; } |
3 3 3 17 17 7 11 17 6 2 3 1 6 6 14 13 14 20 20 6 6 14 91 24 6 4 8 6 5 1 1 2 3 8 7 6 4 10 10 49 1 1 47 37 3 7 10 5 5 3 4 14 23 32 37 36 21 11 28 6 28 23 9 15 12 4 17 14 14 8 9 17 14 3 6 67 67 1 63 3 62 50 3 4 6 10 39 20 32 1 4 1 4 44 6 25 13 7 33 11 9 10 35 44 7 1 6 6 6 151 1 1 3 4 98 3 4 3 1 45 5 49 67 8 8 4 6 1 7 1 4 1 4 3 1 2054 2025 138 24 5 5 5 13 1 12 12 6 6 12 12 3 3 4 12 6 6 12 17 2 1 1 1 2 10 12 25 2 24 2 23 9 14 2 166 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content * * Copyright (c) 2002-2017 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/hrtimer.h> #include <linux/list.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/uio.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/socket.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> #include <linux/can/bcm.h> #include <linux/slab.h> #include <net/sock.h> #include <net/net_namespace.h> /* * To send multiple CAN frame content within TX_SETUP or to filter * CAN messages with multiplex index within RX_SETUP, the number of * different filters is limited to 256 due to the one byte index value. */ #define MAX_NFRAMES 256 /* limit timers to 400 days for sending/timeouts */ #define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) /* use of last_frames[index].flags */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ #define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */ /* get best masking value for can_rx_register() for a given single can_id */ #define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); MODULE_ALIAS("can-proto-2"); #define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex) /* * easy access to the first 64 bit of can(fd)_frame payload. cp->data is * 64 bit aligned so the offset has to be multiples of 8 which is ensured * by the only callers in bcm_rx_cmp_to_index() bcm_rx_handler(). */ static inline u64 get_u64(const struct canfd_frame *cp, int offset) { return *(u64 *)(cp->data + offset); } struct bcm_op { struct list_head list; struct rcu_head rcu; int ifindex; canid_t can_id; u32 flags; unsigned long frames_abs, frames_filtered; struct bcm_timeval ival1, ival2; struct hrtimer timer, thrtimer; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; int cfsiz; u32 count; u32 nframes; u32 currframe; /* void pointers to arrays of struct can[fd]_frame */ void *frames; void *last_frames; struct canfd_frame sframe; struct canfd_frame last_sframe; struct sock *sk; struct net_device *rx_reg_dev; }; struct bcm_sock { struct sock sk; int bound; int ifindex; struct list_head notifier; struct list_head rx_ops; struct list_head tx_ops; unsigned long dropped_usr_msgs; struct proc_dir_entry *bcm_proc_read; char procname [32]; /* inode number in decimal with \0 */ }; static LIST_HEAD(bcm_notifier_list); static DEFINE_SPINLOCK(bcm_notifier_lock); static struct bcm_sock *bcm_busy_notifier; static inline struct bcm_sock *bcm_sk(const struct sock *sk) { return (struct bcm_sock *)sk; } static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) { return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); } /* check limitations for timeval provided by user */ static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head) { if ((msg_head->ival1.tv_sec < 0) || (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) || (msg_head->ival1.tv_usec < 0) || (msg_head->ival1.tv_usec >= USEC_PER_SEC) || (msg_head->ival2.tv_sec < 0) || (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) || (msg_head->ival2.tv_usec < 0) || (msg_head->ival2.tv_usec >= USEC_PER_SEC)) return true; return false; } #define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU) #define OPSIZ sizeof(struct bcm_op) #define MHSIZ sizeof(struct bcm_msg_head) /* * procfs functions */ #if IS_ENABLED(CONFIG_PROC_FS) static char *bcm_proc_getifname(struct net *net, char *result, int ifindex) { struct net_device *dev; if (!ifindex) return "any"; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) strcpy(result, dev->name); else strcpy(result, "???"); rcu_read_unlock(); return result; } static int bcm_proc_show(struct seq_file *m, void *v) { char ifname[IFNAMSIZ]; struct net *net = m->private; struct sock *sk = (struct sock *)pde_data(m->file->f_inode); struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; seq_printf(m, ">>> socket %pK", sk->sk_socket); seq_printf(m, " / sk %pK", sk); seq_printf(m, " / bo %pK", bo); seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs); seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex)); seq_printf(m, " <<<\n"); list_for_each_entry(op, &bo->rx_ops, list) { unsigned long reduction; /* print only active entries & prevent division by zero */ if (!op->frames_abs) continue; seq_printf(m, "rx_op: %03X %-5s ", op->can_id, bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u)", op->nframes); else seq_printf(m, "[%u]", op->nframes); seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' '); if (op->kt_ival1) seq_printf(m, "timeo=%lld ", (long long)ktime_to_us(op->kt_ival1)); if (op->kt_ival2) seq_printf(m, "thr=%lld ", (long long)ktime_to_us(op->kt_ival2)); seq_printf(m, "# recv %ld (%ld) => reduction: ", op->frames_filtered, op->frames_abs); reduction = 100 - (op->frames_filtered * 100) / op->frames_abs; seq_printf(m, "%s%ld%%\n", (reduction == 100) ? "near " : "", reduction); } list_for_each_entry(op, &bo->tx_ops, list) { seq_printf(m, "tx_op: %03X %s ", op->can_id, bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u) ", op->nframes); else seq_printf(m, "[%u] ", op->nframes); if (op->kt_ival1) seq_printf(m, "t1=%lld ", (long long)ktime_to_us(op->kt_ival1)); if (op->kt_ival2) seq_printf(m, "t2=%lld ", (long long)ktime_to_us(op->kt_ival2)); seq_printf(m, "# sent %ld\n", op->frames_abs); } seq_putc(m, '\n'); return 0; } #endif /* CONFIG_PROC_FS */ /* * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface * of the given bcm tx op */ static void bcm_can_tx(struct bcm_op *op) { struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe; int err; /* no target device? => exit */ if (!op->ifindex) return; dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (!dev) { /* RFC: should this bcm_op remove itself here? */ return; } skb = alloc_skb(op->cfsiz + sizeof(struct can_skb_priv), gfp_any()); if (!skb) goto out; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb_put_data(skb, cf, op->cfsiz); /* send with loopback */ skb->dev = dev; can_skb_set_owner(skb, op->sk); err = can_send(skb, 1); if (!err) op->frames_abs++; op->currframe++; /* reached last frame? */ if (op->currframe >= op->nframes) op->currframe = 0; out: dev_put(dev); } /* * bcm_send_to_user - send a BCM message to the userspace * (consisting of bcm_msg_head + x CAN frames) */ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, struct canfd_frame *frames, int has_timestamp) { struct sk_buff *skb; struct canfd_frame *firstframe; struct sockaddr_can *addr; struct sock *sk = op->sk; unsigned int datalen = head->nframes * op->cfsiz; int err; skb = alloc_skb(sizeof(*head) + datalen, gfp_any()); if (!skb) return; skb_put_data(skb, head, sizeof(*head)); if (head->nframes) { /* CAN frames starting here */ firstframe = (struct canfd_frame *)skb_tail_pointer(skb); skb_put_data(skb, frames, datalen); /* * the BCM uses the flags-element of the canfd_frame * structure for internal purposes. This is only * relevant for updates that are generated by the * BCM, where nframes is 1 */ if (head->nframes == 1) firstframe->flags &= BCM_CAN_FLAGS_MASK; } if (has_timestamp) { /* restore rx timestamp */ skb->tstamp = op->rx_stamp; } /* * Put the datagram to the queue so that bcm_recvmsg() can * get it from there. We need to pass the interface index to * bcm_recvmsg(). We pass a whole struct sockaddr_can in skb->cb * containing the interface index. */ sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = op->rx_ifindex; err = sock_queue_rcv_skb(sk, skb); if (err < 0) { struct bcm_sock *bo = bcm_sk(sk); kfree_skb(skb); /* don't care about overflows in this statistic */ bo->dropped_usr_msgs++; } } static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt) { ktime_t ival; if (op->kt_ival1 && op->count) ival = op->kt_ival1; else if (op->kt_ival2) ival = op->kt_ival2; else return false; hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival)); return true; } static void bcm_tx_start_timer(struct bcm_op *op) { if (bcm_tx_set_expiry(op, &op->timer)) hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT); } /* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { op->count--; if (!op->count && (op->flags & TX_COUNTEVT)) { /* create notification to user */ memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = TX_EXPIRED; msg_head.flags = op->flags; msg_head.count = op->count; msg_head.ival1 = op->ival1; msg_head.ival2 = op->ival2; msg_head.can_id = op->can_id; msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); } bcm_can_tx(op); } else if (op->kt_ival2) { bcm_can_tx(op); } return bcm_tx_set_expiry(op, &op->timer) ? HRTIMER_RESTART : HRTIMER_NORESTART; } /* * bcm_rx_changed - create a RX_CHANGED notification due to changed content */ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data) { struct bcm_msg_head head; /* update statistics */ op->frames_filtered++; /* prevent statistics overflow */ if (op->frames_filtered > ULONG_MAX/100) op->frames_filtered = op->frames_abs = 0; /* this element is not throttled anymore */ data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV); memset(&head, 0, sizeof(head)); head.opcode = RX_CHANGED; head.flags = op->flags; head.count = op->count; head.ival1 = op->ival1; head.ival2 = op->ival2; head.can_id = op->can_id; head.nframes = 1; bcm_send_to_user(op, &head, data, 1); } /* * bcm_rx_update_and_send - process a detected relevant receive content change * 1. update the last received data * 2. send a notification to the user (if possible) */ static void bcm_rx_update_and_send(struct bcm_op *op, struct canfd_frame *lastdata, const struct canfd_frame *rxdata) { memcpy(lastdata, rxdata, op->cfsiz); /* mark as used and throttled by default */ lastdata->flags |= (RX_RECV|RX_THR); /* throttling mode inactive ? */ if (!op->kt_ival2) { /* send RX_CHANGED to the user immediately */ bcm_rx_changed(op, lastdata); return; } /* with active throttling timer we are just done here */ if (hrtimer_active(&op->thrtimer)) return; /* first reception with enabled throttling mode */ if (!op->kt_lastmsg) goto rx_changed_settime; /* got a second frame inside a potential throttle period? */ if (ktime_us_delta(ktime_get(), op->kt_lastmsg) < ktime_to_us(op->kt_ival2)) { /* do not send the saved data - only start throttle timer */ hrtimer_start(&op->thrtimer, ktime_add(op->kt_lastmsg, op->kt_ival2), HRTIMER_MODE_ABS_SOFT); return; } /* the gap was that big, that throttling was not needed here */ rx_changed_settime: bcm_rx_changed(op, lastdata); op->kt_lastmsg = ktime_get(); } /* * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly * received data stored in op->last_frames[] */ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, const struct canfd_frame *rxdata) { struct canfd_frame *cf = op->frames + op->cfsiz * index; struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; int i; /* * no one uses the MSBs of flags for comparison, * so we use it here to detect the first time of reception */ if (!(lcf->flags & RX_RECV)) { /* received data for the first time => send update to user */ bcm_rx_update_and_send(op, lcf, rxdata); return; } /* do a real check in CAN frame data section */ for (i = 0; i < rxdata->len; i += 8) { if ((get_u64(cf, i) & get_u64(rxdata, i)) != (get_u64(cf, i) & get_u64(lcf, i))) { bcm_rx_update_and_send(op, lcf, rxdata); return; } } if (op->flags & RX_CHECK_DLC) { /* do a real check in CAN frame length */ if (rxdata->len != lcf->len) { bcm_rx_update_and_send(op, lcf, rxdata); return; } } } /* * bcm_rx_starttimer - enable timeout monitoring for CAN frame reception */ static void bcm_rx_starttimer(struct bcm_op *op) { if (op->flags & RX_NO_AUTOTIMER) return; if (op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } /* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; /* if user wants to be informed, when cyclic CAN-Messages come back */ if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { /* clear received CAN frames to indicate 'nothing received' */ memset(op->last_frames, 0, op->nframes * op->cfsiz); } /* create notification to user */ memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; msg_head.count = op->count; msg_head.ival1 = op->ival1; msg_head.ival2 = op->ival2; msg_head.can_id = op->can_id; msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); return HRTIMER_NORESTART; } /* * bcm_rx_do_flush - helper for bcm_rx_thr_flush */ static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index) { struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; if ((op->last_frames) && (lcf->flags & RX_THR)) { bcm_rx_changed(op, lcf); return 1; } return 0; } /* * bcm_rx_thr_flush - Check for throttled data and send it to the userspace */ static int bcm_rx_thr_flush(struct bcm_op *op) { int updated = 0; if (op->nframes > 1) { unsigned int i; /* for MUX filter we start at index 1 */ for (i = 1; i < op->nframes; i++) updated += bcm_rx_do_flush(op, i); } else { /* for RX_FILTER_ID and simple filter */ updated += bcm_rx_do_flush(op, 0); } return updated; } /* * bcm_rx_thr_handler - the time for blocked content updates is over now: * Check for throttled data and send it to the userspace */ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); if (bcm_rx_thr_flush(op)) { hrtimer_forward_now(hrtimer, op->kt_ival2); return HRTIMER_RESTART; } else { /* rearm throttle handling */ op->kt_lastmsg = 0; return HRTIMER_NORESTART; } } /* * bcm_rx_handler - handle a CAN frame reception */ static void bcm_rx_handler(struct sk_buff *skb, void *data) { struct bcm_op *op = (struct bcm_op *)data; const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data; unsigned int i; if (op->can_id != rxframe->can_id) return; /* make sure to handle the correct frame type (CAN / CAN FD) */ if (op->flags & CAN_FD_FRAME) { if (!can_is_canfd_skb(skb)) return; } else { if (!can_is_can_skb(skb)) return; } /* disable timeout */ hrtimer_cancel(&op->timer); /* save rx timestamp */ op->rx_stamp = skb->tstamp; /* save originator for recvfrom() */ op->rx_ifindex = skb->dev->ifindex; /* update statistics */ op->frames_abs++; if (op->flags & RX_RTR_FRAME) { /* send reply for RTR-request (placed in op->frames[0]) */ bcm_can_tx(op); return; } if (op->flags & RX_FILTER_ID) { /* the easiest case */ bcm_rx_update_and_send(op, op->last_frames, rxframe); goto rx_starttimer; } if (op->nframes == 1) { /* simple compare with index 0 */ bcm_rx_cmp_to_index(op, 0, rxframe); goto rx_starttimer; } if (op->nframes > 1) { /* * multiplex compare * * find the first multiplex mask that fits. * Remark: The MUX-mask is stored in index 0 - but only the * first 64 bits of the frame data[] are relevant (CAN FD) */ for (i = 1; i < op->nframes; i++) { if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) == (get_u64(op->frames, 0) & get_u64(op->frames + op->cfsiz * i, 0))) { bcm_rx_cmp_to_index(op, i, rxframe); break; } } } rx_starttimer: bcm_rx_starttimer(op); } /* * helpers for bcm_op handling: find & delete bcm [rx|tx] op elements */ static struct bcm_op *bcm_find_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op; list_for_each_entry(op, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) return op; } return NULL; } static void bcm_free_op_rcu(struct rcu_head *rcu_head) { struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); if ((op->last_frames) && (op->last_frames != &op->last_sframe)) kfree(op->last_frames); kfree(op); } static void bcm_remove_op(struct bcm_op *op) { hrtimer_cancel(&op->timer); hrtimer_cancel(&op->thrtimer); call_rcu(&op->rcu, bcm_free_op_rcu); } static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { can_rx_unregister(dev_net(dev), dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); /* mark as removed subscription */ op->rx_reg_dev = NULL; } else printk(KERN_ERR "can-bcm: bcm_rx_unreg: registered device " "mismatch %p %p\n", op->rx_reg_dev, dev); } /* * bcm_delete_rx_op - find and remove a rx op (returns number of removed ops) */ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op, *n; list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { /* disable automatic timer on frame reception */ op->flags |= RX_NO_AUTOTIMER; /* * Don't care if we're bound or not (due to netdev * problems) can_rx_unregister() is always a save * thing to do here. */ if (op->ifindex) { /* * Only remove subscriptions that had not * been removed due to NETDEV_UNREGISTER * in bcm_notifier() */ if (op->rx_reg_dev) { struct net_device *dev; dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (dev) { bcm_rx_unreg(dev, op); dev_put(dev); } } } else can_rx_unregister(sock_net(op->sk), NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); list_del(&op->list); bcm_remove_op(op); return 1; /* done */ } } return 0; /* not found */ } /* * bcm_delete_tx_op - find and remove a tx op (returns number of removed ops) */ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op, *n; list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { list_del(&op->list); bcm_remove_op(op); return 1; /* done */ } } return 0; /* not found */ } /* * bcm_read_op - read out a bcm_op and send it to the user (for bcm_sendmsg) */ static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head, int ifindex) { struct bcm_op *op = bcm_find_op(ops, msg_head, ifindex); if (!op) return -EINVAL; /* put current values into msg_head */ msg_head->flags = op->flags; msg_head->count = op->count; msg_head->ival1 = op->ival1; msg_head->ival2 = op->ival2; msg_head->nframes = op->nframes; bcm_send_to_user(op, msg_head, op->frames, 0); return MHSIZ; } /* * bcm_tx_setup - create or update a bcm tx op (for bcm_sendmsg) */ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, int ifindex, struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; struct canfd_frame *cf; unsigned int i; int err; /* we need a real device to send frames */ if (!ifindex) return -ENODEV; /* check nframes boundaries - we need at least one CAN frame */ if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; /* check timeval limitations */ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) return -EINVAL; /* check the given can_id */ op = bcm_find_op(&bo->tx_ops, msg_head, ifindex); if (op) { /* update existing BCM operation */ /* * Do we need more space for the CAN frames than currently * allocated? -> This is a _really_ unusual use-case and * therefore (complexity / locking) it is not supported. */ if (msg_head->nframes > op->nframes) return -E2BIG; /* update CAN frames content */ for (i = 0; i < msg_head->nframes; i++) { cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) err = -EINVAL; } else { if (cf->len > 8) err = -EINVAL; } if (err < 0) return err; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ cf->can_id = msg_head->can_id; } } op->flags = msg_head->flags; } else { /* insert new BCM operation for the given can_id */ op = kzalloc(OPSIZ, GFP_KERNEL); if (!op) return -ENOMEM; op->can_id = msg_head->can_id; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; /* create array for CAN frames and copy the data */ if (msg_head->nframes > 1) { op->frames = kmalloc_array(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->frames) { kfree(op); return -ENOMEM; } } else op->frames = &op->sframe; for (i = 0; i < msg_head->nframes; i++) { cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); if (err < 0) goto free_op; if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) err = -EINVAL; } else { if (cf->len > 8) err = -EINVAL; } if (err < 0) goto free_op; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ cf->can_id = msg_head->can_id; } } /* tx_ops never compare with previous received messages */ op->last_frames = NULL; /* bcm_can_tx / bcm_tx_timeout_handler needs this */ op->sk = sk; op->ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_tx_timeout_handler; /* currently unused in tx_ops */ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the tx_ops */ list_add(&op->list, &bo->tx_ops); } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */ if (op->nframes != msg_head->nframes) { op->nframes = msg_head->nframes; /* start multiple frame transmission with index 0 */ op->currframe = 0; } /* check flags */ if (op->flags & TX_RESET_MULTI_IDX) { /* start multiple frame transmission with index 0 */ op->currframe = 0; } if (op->flags & SETTIMER) { /* set timer values */ op->count = msg_head->count; op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); /* disable an active timer due to zero values? */ if (!op->kt_ival1 && !op->kt_ival2) hrtimer_cancel(&op->timer); } if (op->flags & STARTTIMER) { hrtimer_cancel(&op->timer); /* spec: send CAN frame when starting timer */ op->flags |= TX_ANNOUNCE; } if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); if (op->count) op->count--; } if (op->flags & STARTTIMER) bcm_tx_start_timer(op); return msg_head->nframes * op->cfsiz + MHSIZ; free_op: if (op->frames != &op->sframe) kfree(op->frames); kfree(op); return err; } /* * bcm_rx_setup - create or update a bcm rx op (for bcm_sendmsg) */ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, int ifindex, struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; int do_rx_register; int err = 0; if ((msg_head->flags & RX_FILTER_ID) || (!(msg_head->nframes))) { /* be robust against wrong usage ... */ msg_head->flags |= RX_FILTER_ID; /* ignore trailing garbage */ msg_head->nframes = 0; } /* the first element contains the mux-mask => MAX_NFRAMES + 1 */ if (msg_head->nframes > MAX_NFRAMES + 1) return -EINVAL; if ((msg_head->flags & RX_RTR_FRAME) && ((msg_head->nframes != 1) || (!(msg_head->can_id & CAN_RTR_FLAG)))) return -EINVAL; /* check timeval limitations */ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) return -EINVAL; /* check the given can_id */ op = bcm_find_op(&bo->rx_ops, msg_head, ifindex); if (op) { /* update existing BCM operation */ /* * Do we need more space for the CAN frames than currently * allocated? -> This is a _really_ unusual use-case and * therefore (complexity / locking) it is not supported. */ if (msg_head->nframes > op->nframes) return -E2BIG; if (msg_head->nframes) { /* update CAN frames content */ err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) return err; /* clear last_frames to indicate 'nothing received' */ memset(op->last_frames, 0, msg_head->nframes * op->cfsiz); } op->nframes = msg_head->nframes; op->flags = msg_head->flags; /* Only an update -> do not call can_rx_register() */ do_rx_register = 0; } else { /* insert new BCM operation for the given can_id */ op = kzalloc(OPSIZ, GFP_KERNEL); if (!op) return -ENOMEM; op->can_id = msg_head->can_id; op->nframes = msg_head->nframes; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; if (msg_head->nframes > 1) { /* create array for CAN frames and copy the data */ op->frames = kmalloc_array(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->frames) { kfree(op); return -ENOMEM; } /* create and init array for received CAN frames */ op->last_frames = kcalloc(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->last_frames) { kfree(op->frames); kfree(op); return -ENOMEM; } } else { op->frames = &op->sframe; op->last_frames = &op->last_sframe; } if (msg_head->nframes) { err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) { if (op->frames != &op->sframe) kfree(op->frames); if (op->last_frames != &op->last_sframe) kfree(op->last_frames); kfree(op); return err; } } /* bcm_can_tx / bcm_tx_timeout_handler needs this */ op->sk = sk; op->ifindex = ifindex; /* ifindex for timeout events w/o previous frame reception */ op->rx_ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_rx_timeout_handler; hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->thrtimer.function = bcm_rx_thr_handler; /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); /* call can_rx_register() */ do_rx_register = 1; } /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */ /* check flags */ if (op->flags & RX_RTR_FRAME) { struct canfd_frame *frame0 = op->frames; /* no timers in RTR-mode */ hrtimer_cancel(&op->thrtimer); hrtimer_cancel(&op->timer); /* * funny feature in RX(!)_SETUP only for RTR-mode: * copy can_id into frame BUT without RTR-flag to * prevent a full-load-loopback-test ... ;-] */ if ((op->flags & TX_CP_CAN_ID) || (frame0->can_id == op->can_id)) frame0->can_id = op->can_id & ~CAN_RTR_FLAG; } else { if (op->flags & SETTIMER) { /* set timer value */ op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); /* disable an active timer due to zero value? */ if (!op->kt_ival1) hrtimer_cancel(&op->timer); /* * In any case cancel the throttle timer, flush * potentially blocked msgs and reset throttle handling */ op->kt_lastmsg = 0; hrtimer_cancel(&op->thrtimer); bcm_rx_thr_flush(op); } if ((op->flags & STARTTIMER) && op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } /* now we can register for can_ids, if we added a new bcm_op */ if (do_rx_register) { if (ifindex) { struct net_device *dev; dev = dev_get_by_index(sock_net(sk), ifindex); if (dev) { err = can_rx_register(sock_net(sk), dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); op->rx_reg_dev = dev; dev_put(dev); } } else err = can_rx_register(sock_net(sk), NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ list_del(&op->list); bcm_remove_op(op); return err; } } return msg_head->nframes * op->cfsiz + MHSIZ; } /* * bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg) */ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk, int cfsiz) { struct sk_buff *skb; struct net_device *dev; int err; /* we need a real device to send frames */ if (!ifindex) return -ENODEV; skb = alloc_skb(cfsiz + sizeof(struct can_skb_priv), GFP_KERNEL); if (!skb) return -ENOMEM; can_skb_reserve(skb); err = memcpy_from_msg(skb_put(skb, cfsiz), msg, cfsiz); if (err < 0) { kfree_skb(skb); return err; } dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) { kfree_skb(skb); return -ENODEV; } can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ dev_put(dev); if (err) return err; return cfsiz + MHSIZ; } /* * bcm_sendmsg - process BCM commands (opcodes) from the userspace */ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); int ifindex = bo->ifindex; /* default ifindex for this bcm_op */ struct bcm_msg_head msg_head; int cfsiz; int ret; /* read bytes or error codes as return value */ if (!bo->bound) return -ENOTCONN; /* check for valid message length from userspace */ if (size < MHSIZ) return -EINVAL; /* read message head information */ ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ); if (ret < 0) return ret; cfsiz = CFSIZ(msg_head.flags); if ((size - MHSIZ) % cfsiz) return -EINVAL; /* check for alternative ifindex for this bcm_op */ if (!ifindex && msg->msg_name) { /* no bound device as default => check msg_name */ DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); if (msg->msg_namelen < BCM_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; /* ifindex from sendto() */ ifindex = addr->can_ifindex; if (ifindex) { struct net_device *dev; dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENODEV; if (dev->type != ARPHRD_CAN) { dev_put(dev); return -ENODEV; } dev_put(dev); } } lock_sock(sk); switch (msg_head.opcode) { case TX_SETUP: ret = bcm_tx_setup(&msg_head, msg, ifindex, sk); break; case RX_SETUP: ret = bcm_rx_setup(&msg_head, msg, ifindex, sk); break; case TX_DELETE: if (bcm_delete_tx_op(&bo->tx_ops, &msg_head, ifindex)) ret = MHSIZ; else ret = -EINVAL; break; case RX_DELETE: if (bcm_delete_rx_op(&bo->rx_ops, &msg_head, ifindex)) ret = MHSIZ; else ret = -EINVAL; break; case TX_READ: /* reuse msg_head for the reply to TX_READ */ msg_head.opcode = TX_STATUS; ret = bcm_read_op(&bo->tx_ops, &msg_head, ifindex); break; case RX_READ: /* reuse msg_head for the reply to RX_READ */ msg_head.opcode = RX_STATUS; ret = bcm_read_op(&bo->rx_ops, &msg_head, ifindex); break; case TX_SEND: /* we need exactly one CAN frame behind the msg head */ if ((msg_head.nframes != 1) || (size != cfsiz + MHSIZ)) ret = -EINVAL; else ret = bcm_tx_send(msg, ifindex, sk, cfsiz); break; default: ret = -EINVAL; break; } release_sock(sk); return ret; } /* * notification handler for netdevice status changes */ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, struct net_device *dev) { struct sock *sk = &bo->sk; struct bcm_op *op; int notify_enodev = 0; if (!net_eq(dev_net(dev), sock_net(sk))) return; switch (msg) { case NETDEV_UNREGISTER: lock_sock(sk); /* remove device specific receive entries */ list_for_each_entry(op, &bo->rx_ops, list) if (op->rx_reg_dev == dev) bcm_rx_unreg(dev, op); /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { bo->bound = 0; bo->ifindex = 0; notify_enodev = 1; } release_sock(sk); if (notify_enodev) { sk->sk_err = ENODEV; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } break; case NETDEV_DOWN: if (bo->bound && bo->ifindex == dev->ifindex) { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } } } static int bcm_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) return NOTIFY_DONE; if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */ return NOTIFY_DONE; spin_lock(&bcm_notifier_lock); list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) { spin_unlock(&bcm_notifier_lock); bcm_notify(bcm_busy_notifier, msg, dev); spin_lock(&bcm_notifier_lock); } bcm_busy_notifier = NULL; spin_unlock(&bcm_notifier_lock); return NOTIFY_DONE; } /* * initial settings for all BCM sockets to be set at socket creation time */ static int bcm_init(struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); bo->bound = 0; bo->ifindex = 0; bo->dropped_usr_msgs = 0; bo->bcm_proc_read = NULL; INIT_LIST_HEAD(&bo->tx_ops); INIT_LIST_HEAD(&bo->rx_ops); /* set notifier */ spin_lock(&bcm_notifier_lock); list_add_tail(&bo->notifier, &bcm_notifier_list); spin_unlock(&bcm_notifier_lock); return 0; } /* * standard socket functions */ static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; struct net *net; struct bcm_sock *bo; struct bcm_op *op, *next; if (!sk) return 0; net = sock_net(sk); bo = bcm_sk(sk); /* remove bcm_ops, timer, rx_unregister(), etc. */ spin_lock(&bcm_notifier_lock); while (bcm_busy_notifier == bo) { spin_unlock(&bcm_notifier_lock); schedule_timeout_uninterruptible(1); spin_lock(&bcm_notifier_lock); } list_del(&bo->notifier); spin_unlock(&bcm_notifier_lock); lock_sock(sk); #if IS_ENABLED(CONFIG_PROC_FS) /* remove procfs entry */ if (net->can.bcmproc_dir && bo->bcm_proc_read) remove_proc_entry(bo->procname, net->can.bcmproc_dir); #endif /* CONFIG_PROC_FS */ list_for_each_entry_safe(op, next, &bo->tx_ops, list) bcm_remove_op(op); list_for_each_entry_safe(op, next, &bo->rx_ops, list) { /* * Don't care if we're bound or not (due to netdev problems) * can_rx_unregister() is always a save thing to do here. */ if (op->ifindex) { /* * Only remove subscriptions that had not * been removed due to NETDEV_UNREGISTER * in bcm_notifier() */ if (op->rx_reg_dev) { struct net_device *dev; dev = dev_get_by_index(net, op->ifindex); if (dev) { bcm_rx_unreg(dev, op); dev_put(dev); } } } else can_rx_unregister(net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); } synchronize_rcu(); list_for_each_entry_safe(op, next, &bo->rx_ops, list) bcm_remove_op(op); /* remove device reference */ if (bo->bound) { bo->bound = 0; bo->ifindex = 0; } sock_orphan(sk); sock->sk = NULL; release_sock(sk); sock_put(sk); return 0; } static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, int flags) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); struct net *net = sock_net(sk); int ret = 0; if (len < BCM_MIN_NAMELEN) return -EINVAL; lock_sock(sk); if (bo->bound) { ret = -EISCONN; goto fail; } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { ret = -ENODEV; goto fail; } if (dev->type != ARPHRD_CAN) { dev_put(dev); ret = -ENODEV; goto fail; } bo->ifindex = dev->ifindex; dev_put(dev); } else { /* no interface reference for ifindex = 0 ('any' CAN device) */ bo->ifindex = 0; } #if IS_ENABLED(CONFIG_PROC_FS) if (net->can.bcmproc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_net_single(bo->procname, 0644, net->can.bcmproc_dir, bcm_proc_show, sk); if (!bo->bcm_proc_read) { ret = -ENOMEM; goto fail; } } #endif /* CONFIG_PROC_FS */ bo->bound = 1; fail: release_sock(sk); return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; int err; skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; if (skb->len < size) size = skb->len; err = memcpy_to_msg(msg, skb->data, size); if (err < 0) { skb_free_datagram(sk, skb); return err; } sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(BCM_MIN_NAMELEN); msg->msg_namelen = BCM_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } skb_free_datagram(sk, skb); return size; } static int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { /* no ioctls for socket layer -> hand it down to NIC layer */ return -ENOIOCTLCMD; } static const struct proto_ops bcm_ops = { .family = PF_CAN, .release = bcm_release, .bind = sock_no_bind, .connect = bcm_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, .ioctl = bcm_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = bcm_sendmsg, .recvmsg = bcm_recvmsg, .mmap = sock_no_mmap, }; static struct proto bcm_proto __read_mostly = { .name = "CAN_BCM", .owner = THIS_MODULE, .obj_size = sizeof(struct bcm_sock), .init = bcm_init, }; static const struct can_proto bcm_can_proto = { .type = SOCK_DGRAM, .protocol = CAN_BCM, .ops = &bcm_ops, .prot = &bcm_proto, }; static int canbcm_pernet_init(struct net *net) { #if IS_ENABLED(CONFIG_PROC_FS) /* create /proc/net/can-bcm directory */ net->can.bcmproc_dir = proc_net_mkdir(net, "can-bcm", net->proc_net); #endif /* CONFIG_PROC_FS */ return 0; } static void canbcm_pernet_exit(struct net *net) { #if IS_ENABLED(CONFIG_PROC_FS) /* remove /proc/net/can-bcm directory */ if (net->can.bcmproc_dir) remove_proc_entry("can-bcm", net->proc_net); #endif /* CONFIG_PROC_FS */ } static struct pernet_operations canbcm_pernet_ops __read_mostly = { .init = canbcm_pernet_init, .exit = canbcm_pernet_exit, }; static struct notifier_block canbcm_notifier = { .notifier_call = bcm_notifier }; static int __init bcm_module_init(void) { int err; pr_info("can: broadcast manager protocol\n"); err = register_pernet_subsys(&canbcm_pernet_ops); if (err) return err; err = register_netdevice_notifier(&canbcm_notifier); if (err) goto register_notifier_failed; err = can_proto_register(&bcm_can_proto); if (err < 0) { printk(KERN_ERR "can: registration of bcm protocol failed\n"); goto register_proto_failed; } return 0; register_proto_failed: unregister_netdevice_notifier(&canbcm_notifier); register_notifier_failed: unregister_pernet_subsys(&canbcm_pernet_ops); return err; } static void __exit bcm_module_exit(void) { can_proto_unregister(&bcm_can_proto); unregister_netdevice_notifier(&canbcm_notifier); unregister_pernet_subsys(&canbcm_pernet_ops); } module_init(bcm_module_init); module_exit(bcm_module_exit); |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. */ #ifndef __INCORE_DOT_H__ #define __INCORE_DOT_H__ #include <linux/fs.h> #include <linux/kobject.h> #include <linux/workqueue.h> #include <linux/dlm.h> #include <linux/buffer_head.h> #include <linux/rcupdate.h> #include <linux/rculist_bl.h> #include <linux/completion.h> #include <linux/rbtree.h> #include <linux/ktime.h> #include <linux/percpu.h> #include <linux/lockref.h> #include <linux/rhashtable.h> #include <linux/mutex.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 struct gfs2_log_operations; struct gfs2_bufdata; struct gfs2_holder; struct gfs2_glock; struct gfs2_quota_data; struct gfs2_trans; struct gfs2_jdesc; struct gfs2_sbd; struct lm_lockops; typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); struct gfs2_log_header_host { u64 lh_sequence; /* Sequence number of this transaction */ u32 lh_flags; /* GFS2_LOG_HEAD_... */ u32 lh_tail; /* Block number of log tail */ u32 lh_blkno; s64 lh_local_total; s64 lh_local_free; s64 lh_local_dinodes; }; /* * Structure of operations that are associated with each * type of element in the log. */ struct gfs2_log_operations { void (*lo_before_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); void (*lo_before_scan) (struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass); int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, struct gfs2_log_descriptor *ld, __be64 *ptr, int pass); void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass); const char *lo_name; }; #define GBF_FULL 1 /** * Clone bitmaps (bi_clone): * * - When a block is freed, we remember the previous state of the block in the * clone bitmap, and only mark the block as free in the real bitmap. * * - When looking for a block to allocate, we check for a free block in the * clone bitmap, and if no clone bitmap exists, in the real bitmap. * * - For allocating a block, we mark it as allocated in the real bitmap, and if * a clone bitmap exists, also in the clone bitmap. * * - At the end of a log_flush, we copy the real bitmap into the clone bitmap * to make the clone bitmap reflect the current allocation state. * (Alternatively, we could remove the clone bitmap.) * * The clone bitmaps are in-core only, and is never written to disk. * * These steps ensure that blocks which have been freed in a transaction cannot * be reallocated in that same transaction. */ struct gfs2_bitmap { struct buffer_head *bi_bh; char *bi_clone; unsigned long bi_flags; u32 bi_offset; u32 bi_start; u32 bi_bytes; u32 bi_blocks; }; struct gfs2_rgrpd { struct rb_node rd_node; /* Link with superblock */ struct gfs2_glock *rd_gl; /* Glock for this rgrp */ u64 rd_addr; /* grp block disk address */ u64 rd_data0; /* first data location */ u32 rd_length; /* length of rgrp header in fs blocks */ u32 rd_data; /* num of data blocks in rgrp */ u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; u32 rd_requested; /* number of blocks in rd_rstree */ u32 rd_reserved; /* number of reserved blocks */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; struct gfs2_bitmap *rd_bits; struct gfs2_sbd *rd_sbd; struct gfs2_rgrp_lvb *rd_rgl; u32 rd_last_alloc; u32 rd_flags; u32 rd_extfail_pt; /* extent failure point */ #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ #define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct mutex rd_mutex; struct rb_root rd_rstree; /* multi-block reservation tree */ }; enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, }; BUFFER_FNS(Pinned, pinned) TAS_BUFFER_FNS(Pinned, pinned) BUFFER_FNS(Escaped, escaped) TAS_BUFFER_FNS(Escaped, escaped) struct gfs2_bufdata { struct buffer_head *bd_bh; struct gfs2_glock *bd_gl; u64 bd_blkno; struct list_head bd_list; struct gfs2_trans *bd_tr; struct list_head bd_ail_st_list; struct list_head bd_ail_gl_list; }; /* * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a * prefix of lock_dlm_ gets awkward. */ #define GDLM_STRNAME_BYTES 25 #define GDLM_LVB_SIZE 32 /* * ls_recover_flags: * * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been * held by failed nodes whose journals need recovery. Those locks should * only be used for journal recovery until the journal recovery is done. * This is set by the dlm recover_prep callback and cleared by the * gfs2_control thread when journal recovery is complete. To avoid * races between recover_prep setting and gfs2_control clearing, recover_spin * is held while changing this bit and reading/writing recover_block * and recover_start. * * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used. * * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing * recovery of all journals before allowing other nodes to mount the fs. * This is cleared when FIRST_MOUNT_DONE is set. * * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished * recovery of all journals, and now allows other nodes to mount the fs. * * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared * BLOCK_LOCKS for the first time. The gfs2_control thread should now * control clearing BLOCK_LOCKS for further recoveries. * * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq. * * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep() * and recover_done(), i.e. set while recover_block == recover_start. */ enum { DFL_BLOCK_LOCKS = 0, DFL_NO_DLM_OPS = 1, DFL_FIRST_MOUNT = 2, DFL_FIRST_MOUNT_DONE = 3, DFL_MOUNT_DONE = 4, DFL_UNMOUNT = 5, DFL_DLM_RECOVERY = 6, }; /* * We are using struct lm_lockname as an rhashtable key. Avoid holes within * the struct; padding at the end is fine. */ struct lm_lockname { u64 ln_number; struct gfs2_sbd *ln_sbd; unsigned int ln_type; }; #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ ((name1)->ln_type == (name2)->ln_type) && \ ((name1)->ln_sbd == (name2)->ln_sbd)) struct gfs2_glock_operations { int (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh)(struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_instantiate) (struct gfs2_glock *gl); int (*go_held)(struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); void (*go_free)(struct gfs2_glock *gl); const int go_subclass; const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ #define GLOF_LVB 2 /* Lock Value Block attached */ #define GLOF_LRU 4 /* LRU managed */ #define GLOF_NONDISK 8 /* not I/O related */ }; enum { GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */ GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */ GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */ GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */ GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */ GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */ GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */ GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */ GFS2_NR_LKSTATS }; struct gfs2_lkstats { u64 stats[GFS2_NR_LKSTATS]; }; enum { /* States */ HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ HIF_WAIT = 10, }; struct gfs2_holder { struct list_head gh_list; struct gfs2_glock *gh_gl; struct pid *gh_owner_pid; u16 gh_flags; u16 gh_state; int gh_error; unsigned long gh_iflags; /* HIF_... */ unsigned long gh_ip; }; /* Number of quota types we support */ #define GFS2_MAXQUOTAS 2 struct gfs2_qadata { /* quota allocation data */ /* Quota stuff */ struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS]; struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS]; unsigned int qa_qd_num; int qa_ref; }; /* Resource group multi-block reservation, in order of appearance: Step 1. Function prepares to write, allocates a mb, sets the size hint. Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use Step 4. Bits are assigned from the rgrp based on either the reservation or wherever it can. */ struct gfs2_blkreserv { struct rb_node rs_node; /* node within rd_rstree */ struct gfs2_rgrpd *rs_rgd; u64 rs_start; u32 rs_requested; u32 rs_reserved; /* number of reserved blocks */ }; /* * Allocation parameters * @target: The number of blocks we'd ideally like to allocate * @aflags: The flags (e.g. Orlov flag) * * The intent is to gradually expand this structure over time in * order to give more information, e.g. alignment, min extent size * to the allocation code. */ struct gfs2_alloc_parms { u64 target; u32 min_target; u32 aflags; u64 allowed; }; enum { GLF_LOCK = 1, GLF_INSTANTIATE_NEEDED = 2, /* needs instantiate */ GLF_DEMOTE = 3, GLF_PENDING_DEMOTE = 4, GLF_DEMOTE_IN_PROGRESS = 5, GLF_DIRTY = 6, GLF_LFLUSH = 7, GLF_INVALIDATE_IN_PROGRESS = 8, GLF_REPLY_PENDING = 9, GLF_INITIAL = 10, GLF_FROZEN = 11, GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, GLF_FREEING = 16, /* Wait for glock to be freed */ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ GLF_VERIFY_EVICT = 18, /* iopen glocks only */ }; struct gfs2_glock { unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; struct lockref gl_lockref; /* State fields protected by gl_lockref.lock */ unsigned int gl_state:2, /* Current state */ gl_target:2, /* Target state */ gl_demote_state:2, /* State requested by remote node */ gl_req:2, /* State in last dlm request */ gl_reply:8; /* Last reply from the dlm */ unsigned long gl_demote_time; /* time of first demote request */ long gl_hold_time; struct list_head gl_holders; const struct gfs2_glock_operations *gl_ops; ktime_t gl_dstamp; struct gfs2_lkstats gl_stats; struct dlm_lksb gl_lksb; unsigned long gl_tchange; void *gl_object; struct list_head gl_lru; struct list_head gl_ail_list; atomic_t gl_ail_count; atomic_t gl_revokes; struct delayed_work gl_work; /* For iopen glocks only */ struct { struct delayed_work gl_delete; u64 gl_no_formal_ino; }; struct rcu_head gl_rcu; struct rhash_head gl_node; }; enum { GIF_QD_LOCKED = 1, GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, GIF_DEFERRED_DELETE = 7, }; struct gfs2_inode { struct inode i_inode; u64 i_no_addr; u64 i_no_formal_ino; u64 i_generation; u64 i_eattr; unsigned long i_flags; /* GIF_... */ struct gfs2_glock *i_gl; struct gfs2_holder i_iopen_gh; struct gfs2_qadata *i_qadata; /* quota allocation data */ struct gfs2_holder i_rgd_gh; struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */ u64 i_goal; /* goal block for allocations */ atomic_t i_sizehint; /* hint of the write size */ struct rw_semaphore i_rw_mutex; struct list_head i_ordered; __be64 *i_hash_cache; u32 i_entries; u32 i_diskflags; u8 i_height; u8 i_depth; u16 i_rahead; }; /* * Since i_inode is the first element of struct gfs2_inode, * this is effectively a cast. */ static inline struct gfs2_inode *GFS2_I(struct inode *inode) { return container_of(inode, struct gfs2_inode, i_inode); } static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode) { return inode->i_sb->s_fs_info; } struct gfs2_file { struct mutex f_fl_mutex; struct gfs2_holder f_fl_gh; }; struct gfs2_revoke_replay { struct list_head rr_list; u64 rr_blkno; unsigned int rr_where; }; enum { QDF_CHANGE = 1, QDF_LOCKED = 2, QDF_REFRESH = 3, QDF_QMSG_QUIET = 4, }; struct gfs2_quota_data { struct hlist_bl_node qd_hlist; struct list_head qd_list; struct kqid qd_id; struct gfs2_sbd *qd_sbd; struct lockref qd_lockref; struct list_head qd_lru; unsigned qd_hash; unsigned long qd_flags; /* QDF_... */ s64 qd_change; s64 qd_change_sync; unsigned int qd_slot; unsigned int qd_slot_ref; struct buffer_head *qd_bh; struct gfs2_quota_change *qd_bh_qc; unsigned int qd_bh_count; struct gfs2_glock *qd_gl; struct gfs2_quota_lvb qd_qb; u64 qd_sync_gen; unsigned long qd_last_warn; struct rcu_head qd_rcu; }; enum { TR_TOUCHED = 1, TR_ATTACHED = 2, TR_ONSTACK = 3, }; struct gfs2_trans { unsigned long tr_ip; unsigned int tr_blocks; unsigned int tr_revokes; unsigned int tr_reserved; unsigned long tr_flags; unsigned int tr_num_buf_new; unsigned int tr_num_databuf_new; unsigned int tr_num_buf_rm; unsigned int tr_num_databuf_rm; unsigned int tr_num_revoke; struct list_head tr_list; struct list_head tr_databuf; struct list_head tr_buf; unsigned int tr_first; struct list_head tr_ail1_list; struct list_head tr_ail2_list; }; struct gfs2_journal_extent { struct list_head list; unsigned int lblock; /* First logical block */ u64 dblock; /* First disk block */ u64 blocks; }; struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; unsigned int nr_extents; struct work_struct jd_work; struct inode *jd_inode; struct bio *jd_log_bio; unsigned long jd_flags; #define JDF_RECOVERY 1 unsigned int jd_jid; u32 jd_blocks; int jd_recover_error; /* Replay stuff */ unsigned int jd_found_blocks; unsigned int jd_found_revokes; unsigned int jd_replayed_blocks; struct list_head jd_revoke_list; unsigned int jd_replay_tail; u64 jd_no_addr; }; struct gfs2_statfs_change_host { s64 sc_total; s64 sc_free; s64 sc_dinodes; }; #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF #define GFS2_QUOTA_OFF 0 #define GFS2_QUOTA_ACCOUNT 1 #define GFS2_QUOTA_ON 2 #define GFS2_QUOTA_QUIET 3 /* on but not complaining */ #define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED #define GFS2_DATA_WRITEBACK 1 #define GFS2_DATA_ORDERED 2 #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW #define GFS2_ERRORS_WITHDRAW 0 #define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ #define GFS2_ERRORS_RO 2 /* place holder for future feature */ #define GFS2_ERRORS_PANIC 3 struct gfs2_args { char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ unsigned int ar_spectator:1; /* Don't get a journal */ unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ unsigned int ar_debug:1; /* Oops on errors */ unsigned int ar_posix_acl:1; /* Enable posix acls */ unsigned int ar_quota:2; /* off/account/on */ unsigned int ar_suiddir:1; /* suiddir support */ unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */ unsigned int ar_loccookie:1; /* use location based readdir cookies */ s32 ar_commit; /* Commit interval */ s32 ar_statfs_quantum; /* The fast statfs interval */ s32 ar_quota_quantum; /* The quota interval */ s32 ar_statfs_percent; /* The % change to force sync */ }; struct gfs2_tune { spinlock_t gt_spin; unsigned int gt_logd_secs; unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ unsigned int gt_quota_scale_num; /* Numerator */ unsigned int gt_quota_scale_den; /* Denominator */ unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ unsigned int gt_new_files_jdata; unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ unsigned int gt_complain_secs; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; }; enum { SDF_JOURNAL_CHECKED = 0, SDF_JOURNAL_LIVE = 1, SDF_WITHDRAWN = 2, SDF_NOBARRIERS = 3, SDF_NORECOVERY = 4, SDF_DEMOTE = 5, SDF_NOJOURNALID = 6, SDF_RORECOVERY = 7, /* read only recovery */ SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, SDF_FREEZE_INITIATOR = 10, SDF_WITHDRAWING = 11, /* Will withdraw eventually */ SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are withdrawing */ SDF_KILL = 15, SDF_EVICTING = 16, SDF_FROZEN = 17, }; #define GFS2_FSNAME_LEN 256 struct gfs2_inum_host { u64 no_formal_ino; u64 no_addr; }; struct gfs2_sb_host { u32 sb_magic; u32 sb_type; u32 sb_fs_format; u32 sb_multihost_format; u32 sb_bsize; u32 sb_bsize_shift; struct gfs2_inum_host sb_master_dir; struct gfs2_inum_host sb_root_dir; char sb_lockproto[GFS2_LOCKNAME_LEN]; char sb_locktable[GFS2_LOCKNAME_LEN]; }; /* * lm_mount() return values * * ls_jid - the journal ID this node should use * ls_first - this node is the first to mount the file system * ls_lockspace - lock module's context for this file system * ls_ops - lock module's functions */ struct lm_lockstruct { int ls_jid; unsigned int ls_first; const struct lm_lockops *ls_ops; dlm_lockspace_t *ls_dlm; int ls_recover_jid_done; /* These two are deprecated, */ int ls_recover_jid_status; /* used previously by gfs_controld */ struct dlm_lksb ls_mounted_lksb; /* mounted_lock */ struct dlm_lksb ls_control_lksb; /* control_lock */ char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ char *ls_lvb_bits; spinlock_t ls_recover_spin; /* protects following fields */ unsigned long ls_recover_flags; /* DFL_ */ uint32_t ls_recover_mount; /* gen in first recover_done cb */ uint32_t ls_recover_start; /* gen in last recover_done cb */ uint32_t ls_recover_block; /* copy recover_start in last recover_prep */ uint32_t ls_recover_size; /* size of recover_submit, recover_result */ uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */ uint32_t *ls_recover_result; /* result of last jid recovery */ }; struct gfs2_pcpu_lkstats { /* One struct for each glock type */ struct gfs2_lkstats lkstats[10]; }; /* List of local (per node) statfs inodes */ struct local_statfs_inode { struct list_head si_list; struct inode *si_sc_inode; unsigned int si_jid; /* journal id this statfs inode corresponds to */ }; struct gfs2_sbd { struct super_block *sd_vfs; struct gfs2_pcpu_lkstats __percpu *sd_lkstats; struct kobject sd_kobj; struct completion sd_kobj_unregister; unsigned long sd_flags; /* SDF_... */ struct gfs2_sb_host sd_sb; /* Constants computed on mount */ u32 sd_fsb2bb; u32 sd_fsb2bb_shift; u32 sd_diptrs; /* Number of pointers in a dinode */ u32 sd_inptrs; /* Number of pointers in a indirect block */ u32 sd_ldptrs; /* Number of pointers in a log descriptor block */ u32 sd_jbsize; /* Size of a journaled data block */ u32 sd_hash_bsize; /* sizeof(exhash block) */ u32 sd_hash_bsize_shift; u32 sd_hash_ptrs; /* Number of pointers in a hash block */ u32 sd_qc_per_block; u32 sd_blocks_per_bitmap; u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ u32 sd_max_height; /* Max height of a file's metadata tree */ u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */ struct gfs2_args sd_args; /* Mount arguments */ struct gfs2_tune sd_tune; /* Filesystem tuning structure */ /* Lock Stuff */ struct lm_lockstruct sd_lockstruct; struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_freeze_gl; struct work_struct sd_freeze_work; wait_queue_head_t sd_kill_wait; wait_queue_head_t sd_async_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; struct completion sd_wdack; struct delayed_work sd_control_work; /* Inode Stuff */ struct dentry *sd_master_dir; struct dentry *sd_root_dir; struct inode *sd_jindex; struct inode *sd_statfs_inode; struct inode *sd_sc_inode; struct list_head sd_sc_inodes_list; struct inode *sd_qc_inode; struct inode *sd_rindex; struct inode *sd_quota_inode; /* StatFS stuff */ spinlock_t sd_statfs_spin; struct gfs2_statfs_change_host sd_statfs_master; struct gfs2_statfs_change_host sd_statfs_local; int sd_statfs_force_sync; /* Resource group stuff */ int sd_rindex_uptodate; spinlock_t sd_rindex_spin; struct rb_root sd_rindex_tree; unsigned int sd_rgrps; unsigned int sd_max_rg_data; /* Journal index stuff */ struct list_head sd_jindex_list; spinlock_t sd_jindex_spin; struct mutex sd_jindex_mutex; unsigned int sd_journals; struct gfs2_jdesc *sd_jdesc; struct gfs2_holder sd_journal_gh; struct gfs2_holder sd_jinode_gh; struct gfs2_glock *sd_jinode_gl; struct gfs2_holder sd_sc_gh; struct buffer_head *sd_sc_bh; struct gfs2_holder sd_qc_gh; struct completion sd_journal_ready; /* Workqueue stuff */ struct workqueue_struct *sd_delete_wq; /* Daemon stuff */ struct task_struct *sd_logd_process; struct task_struct *sd_quotad_process; /* Quota stuff */ struct list_head sd_quota_list; atomic_t sd_quota_count; struct mutex sd_quota_mutex; struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; unsigned int sd_quota_slots; unsigned long *sd_quota_bitmap; spinlock_t sd_bitmap_lock; u64 sd_quota_sync_gen; /* Log stuff */ struct address_space sd_aspace; spinlock_t sd_log_lock; struct gfs2_trans *sd_log_tr; unsigned int sd_log_blks_reserved; atomic_t sd_log_pinned; unsigned int sd_log_num_revoke; struct list_head sd_log_revokes; struct list_head sd_log_ordered; spinlock_t sd_ordered_lock; atomic_t sd_log_thresh1; atomic_t sd_log_thresh2; atomic_t sd_log_blks_free; atomic_t sd_log_blks_needed; atomic_t sd_log_revokes_available; wait_queue_head_t sd_log_waitq; wait_queue_head_t sd_logd_waitq; u64 sd_log_sequence; int sd_log_idle; struct rw_semaphore sd_log_flush_lock; atomic_t sd_log_in_flight; wait_queue_head_t sd_log_flush_wait; int sd_log_error; /* First log error */ wait_queue_head_t sd_withdraw_wait; unsigned int sd_log_tail; unsigned int sd_log_flush_tail; unsigned int sd_log_head; unsigned int sd_log_flush_head; spinlock_t sd_ail_lock; struct list_head sd_ail1_list; struct list_head sd_ail2_list; /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; struct mutex sd_freeze_mutex; char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; char sd_proto_name[GFS2_FSNAME_LEN]; /* Debugging crud */ unsigned long sd_last_warning; struct dentry *debugfs_dir; /* debugfs directory */ unsigned long sd_glock_dqs_held; }; static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) { gl->gl_stats.stats[which]++; } static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which) { const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; preempt_disable(); this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++; preempt_enable(); } struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl); static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip) { return GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); } #endif /* __INCORE_DOT_H__ */ |
50 15 29 7 43 28 28 50 50 49 49 19 19 28 27 28 1 27 9 19 |