| 197 197 3754 3752 3143 3134 3752 3459 3134 2190 2192 2192 2192 2805 2806 17 2801 2798 17 3210 3206 503 17 2795 2797 8 8 8 8 8 8 11306 25 15 15 2 15 11273 11306 7898 3781 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 | // SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include <trace/events/mmap_lock.h> #include <linux/mm.h> #include <linux/cgroup.h> #include <linux/memcontrol.h> #include <linux/mmap_lock.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/smp.h> #include <linux/trace_events.h> #include <linux/local_lock.h> EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); #ifdef CONFIG_TRACING /* * Trace calls must be in a separate file, as otherwise there's a circular * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. */ void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) { trace_mmap_lock_start_locking(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, bool success) { trace_mmap_lock_acquire_returned(mm, write, success); } EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) { trace_mmap_lock_released(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_released); #endif /* CONFIG_TRACING */ #ifdef CONFIG_MMU #ifdef CONFIG_PER_VMA_LOCK static inline bool __vma_enter_locked(struct vm_area_struct *vma, bool detaching) { unsigned int tgt_refcnt = VMA_LOCK_OFFSET; /* Additional refcnt if the vma is attached. */ if (!detaching) tgt_refcnt++; /* * If vma is detached then only vma_mark_attached() can raise the * vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached(). */ if (!refcount_add_not_zero(VMA_LOCK_OFFSET, &vma->vm_refcnt)) return false; rwsem_acquire(&vma->vmlock_dep_map, 0, 0, _RET_IP_); rcuwait_wait_event(&vma->vm_mm->vma_writer_wait, refcount_read(&vma->vm_refcnt) == tgt_refcnt, TASK_UNINTERRUPTIBLE); lock_acquired(&vma->vmlock_dep_map, _RET_IP_); return true; } static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached) { *detached = refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt); rwsem_release(&vma->vmlock_dep_map, _RET_IP_); } void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq) { bool locked; /* * __vma_enter_locked() returns false immediately if the vma is not * attached, otherwise it waits until refcnt is indicating that vma * is attached with no readers. */ locked = __vma_enter_locked(vma, false); /* * We should use WRITE_ONCE() here because we can have concurrent reads * from the early lockless pessimistic check in vma_start_read(). * We don't really care about the correctness of that early check, but * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. */ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); if (locked) { bool detached; __vma_exit_locked(vma, &detached); WARN_ON_ONCE(detached); /* vma should remain attached */ } } EXPORT_SYMBOL_GPL(__vma_start_write); void vma_mark_detached(struct vm_area_struct *vma) { vma_assert_write_locked(vma); vma_assert_attached(vma); /* * We are the only writer, so no need to use vma_refcount_put(). * The condition below is unlikely because the vma has been already * write-locked and readers can increment vm_refcnt only temporarily * before they check vm_lock_seq, realize the vma is locked and drop * back the vm_refcnt. That is a narrow window for observing a raised * vm_refcnt. */ if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { /* Wait until vma is detached with no readers. */ if (__vma_enter_locked(vma, true)) { bool detached; __vma_exit_locked(vma, &detached); WARN_ON_ONCE(!detached); } } } /* * Try to read-lock a vma. The function is allowed to occasionally yield false * locked result to avoid performance overhead, in which case we fall back to * using mmap_lock. The function should never yield false unlocked result. * False locked result is possible if mm_lock_seq overflows or if vma gets * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. * * IMPORTANT: RCU lock must be held upon entering the function, but upon error * IT IS RELEASED. The caller must handle this correctly. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) { struct mm_struct *other_mm; int oldcnt; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held"); /* * Check before locking. A race might cause false locked result. * We can use READ_ONCE() for the mm_lock_seq here, and don't need * ACQUIRE semantics, because this is just a lockless check whose result * we don't rely on for anything - the mm_lock_seq read against which we * need ordering is below. */ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) { vma = NULL; goto err; } /* * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. * Acquire fence is required here to avoid reordering against later * vm_lock_seq check and checks inside lock_vma_under_rcu(). */ if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, VMA_REF_LIMIT))) { /* return EAGAIN if vma got detached from under us */ vma = oldcnt ? NULL : ERR_PTR(-EAGAIN); goto err; } rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); if (unlikely(vma->vm_mm != mm)) goto err_unstable; /* * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq * modification invalidates all existing locks. * * We must use ACQUIRE semantics for the mm_lock_seq so that if we are * racing with vma_end_write_all(), we only start reading from the VMA * after it has been unlocked. * This pairs with RELEASE semantics in vma_end_write_all(). */ if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { vma_refcount_put(vma); vma = NULL; goto err; } return vma; err: rcu_read_unlock(); return vma; err_unstable: /* * If vma got attached to another mm from under us, that mm is not * stable and can be freed in the narrow window after vma->vm_refcnt * is dropped and before rcuwait_wake_up(mm) is called. Grab it before * releasing vma->vm_refcnt. */ other_mm = vma->vm_mm; /* use a copy as vma can be freed after we drop vm_refcnt */ /* __mmdrop() is a heavy operation, do it after dropping RCU lock. */ rcu_read_unlock(); mmgrab(other_mm); vma_refcount_put(vma); mmdrop(other_mm); return NULL; } /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be * stable and not isolated. If the VMA is not found or is being modified the * function returns NULL. */ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address) { MA_STATE(mas, &mm->mm_mt, address, address); struct vm_area_struct *vma; retry: rcu_read_lock(); vma = mas_walk(&mas); if (!vma) { rcu_read_unlock(); goto inval; } vma = vma_start_read(mm, vma); if (IS_ERR_OR_NULL(vma)) { /* Check if the VMA got isolated after we found it */ if (PTR_ERR(vma) == -EAGAIN) { count_vm_vma_lock_event(VMA_LOCK_MISS); /* The area was replaced with another one */ goto retry; } /* Failed to lock the VMA */ goto inval; } /* * At this point, we have a stable reference to a VMA: The VMA is * locked and we know it hasn't already been isolated. * From here on, we can access the VMA without worrying about which * fields are accessible for RCU readers. */ rcu_read_unlock(); /* Check if the vma we locked is the right one. */ if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { vma_end_read(vma); goto inval; } return vma; inval: count_vm_vma_lock_event(VMA_LOCK_ABORT); return NULL; } static struct vm_area_struct *lock_next_vma_under_mmap_lock(struct mm_struct *mm, struct vma_iterator *vmi, unsigned long from_addr) { struct vm_area_struct *vma; int ret; ret = mmap_read_lock_killable(mm); if (ret) return ERR_PTR(ret); /* Lookup the vma at the last position again under mmap_read_lock */ vma_iter_set(vmi, from_addr); vma = vma_next(vmi); if (vma) { /* Very unlikely vma->vm_refcnt overflow case */ if (unlikely(!vma_start_read_locked(vma))) vma = ERR_PTR(-EAGAIN); } mmap_read_unlock(mm); return vma; } struct vm_area_struct *lock_next_vma(struct mm_struct *mm, struct vma_iterator *vmi, unsigned long from_addr) { struct vm_area_struct *vma; unsigned int mm_wr_seq; bool mmap_unlocked; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu read lock held"); retry: /* Start mmap_lock speculation in case we need to verify the vma later */ mmap_unlocked = mmap_lock_speculate_try_begin(mm, &mm_wr_seq); vma = vma_next(vmi); if (!vma) return NULL; vma = vma_start_read(mm, vma); if (IS_ERR_OR_NULL(vma)) { /* * Retry immediately if the vma gets detached from under us. * Infinite loop should not happen because the vma we find will * have to be constantly knocked out from under us. */ if (PTR_ERR(vma) == -EAGAIN) { /* reset to search from the last address */ rcu_read_lock(); vma_iter_set(vmi, from_addr); goto retry; } goto fallback; } /* Verify the vma is not behind the last search position. */ if (unlikely(from_addr >= vma->vm_end)) goto fallback_unlock; /* * vma can be ahead of the last search position but we need to verify * it was not shrunk after we found it and another vma has not been * installed ahead of it. Otherwise we might observe a gap that should * not be there. */ if (from_addr < vma->vm_start) { /* Verify only if the address space might have changed since vma lookup. */ if (!mmap_unlocked || mmap_lock_speculate_retry(mm, mm_wr_seq)) { vma_iter_set(vmi, from_addr); if (vma != vma_next(vmi)) goto fallback_unlock; } } return vma; fallback_unlock: rcu_read_unlock(); vma_end_read(vma); fallback: vma = lock_next_vma_under_mmap_lock(mm, vmi, from_addr); rcu_read_lock(); /* Reinitialize the iterator after re-entering rcu read section */ vma_iter_set(vmi, IS_ERR_OR_NULL(vma) ? from_addr : vma->vm_end); return vma; } #endif /* CONFIG_PER_VMA_LOCK */ #ifdef CONFIG_LOCK_MM_AND_FIND_VMA #include <linux/extable.h> static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) { if (likely(mmap_read_trylock(mm))) return true; if (regs && !user_mode(regs)) { unsigned long ip = exception_ip(regs); if (!search_exception_tables(ip)) return false; } return !mmap_read_lock_killable(mm); } static inline bool mmap_upgrade_trylock(struct mm_struct *mm) { /* * We don't have this operation yet. * * It should be easy enough to do: it's basically a * atomic_long_try_cmpxchg_acquire() * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but * it also needs the proper lockdep magic etc. */ return false; } static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) { mmap_read_unlock(mm); if (regs && !user_mode(regs)) { unsigned long ip = exception_ip(regs); if (!search_exception_tables(ip)) return false; } return !mmap_write_lock_killable(mm); } /* * Helper for page fault handling. * * This is kind of equivalent to "mmap_read_lock()" followed * by "find_extend_vma()", except it's a lot more careful about * the locking (and will drop the lock on failure). * * For example, if we have a kernel bug that causes a page * fault, we don't want to just use mmap_read_lock() to get * the mm lock, because that would deadlock if the bug were * to happen while we're holding the mm lock for writing. * * So this checks the exception tables on kernel faults in * order to only do this all for instructions that are actually * expected to fault. * * We can also actually take the mm lock for writing if we * need to extend the vma, which helps the VM layer a lot. */ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, unsigned long addr, struct pt_regs *regs) { struct vm_area_struct *vma; if (!get_mmap_lock_carefully(mm, regs)) return NULL; vma = find_vma(mm, addr); if (likely(vma && (vma->vm_start <= addr))) return vma; /* * Well, dang. We might still be successful, but only * if we can extend a vma to do so. */ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { mmap_read_unlock(mm); return NULL; } /* * We can try to upgrade the mmap lock atomically, * in which case we can continue to use the vma * we already looked up. * * Otherwise we'll have to drop the mmap lock and * re-take it, and also look up the vma again, * re-checking it. */ if (!mmap_upgrade_trylock(mm)) { if (!upgrade_mmap_lock_carefully(mm, regs)) return NULL; vma = find_vma(mm, addr); if (!vma) goto fail; if (vma->vm_start <= addr) goto success; if (!(vma->vm_flags & VM_GROWSDOWN)) goto fail; } if (expand_stack_locked(vma, addr)) goto fail; success: mmap_write_downgrade(mm); return vma; fail: mmap_write_unlock(mm); return NULL; } #endif /* CONFIG_LOCK_MM_AND_FIND_VMA */ #else /* CONFIG_MMU */ /* * At least xtensa ends up having protection faults even with no * MMU.. No stack expansion, at least. */ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, unsigned long addr, struct pt_regs *regs) { struct vm_area_struct *vma; mmap_read_lock(mm); vma = vma_lookup(mm, addr); if (!vma) mmap_read_unlock(mm); return vma; } #endif /* CONFIG_MMU */ |
| 14 4 4 2 2 13 1 11 12 1 11 10 10 10 7 10 2 2 2 1 6 3 3 3 3 1 2 3 1 2 3 3 3 3 5 5 2 2 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | // SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_mq.c Classful multiqueue dummy scheduler * * Copyright (c) 2009 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> struct mq_sched { struct Qdisc **qdiscs; }; static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd) { struct net_device *dev = qdisc_dev(sch); struct tc_mq_qopt_offload opt = { .command = cmd, .handle = sch->handle, }; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt); } static int mq_offload_stats(struct Qdisc *sch) { struct tc_mq_qopt_offload opt = { .command = TC_MQ_STATS, .handle = sch->handle, .stats = { .bstats = &sch->bstats, .qstats = &sch->qstats, }, }; return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt); } static void mq_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); unsigned int ntx; mq_offload(sch, TC_MQ_DESTROY); if (!priv->qdiscs) return; for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); } static int mq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); struct netdev_queue *dev_queue; struct Qdisc *qdisc; unsigned int ntx; if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); if (!priv->qdiscs) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { dev_queue = netdev_get_tx_queue(dev, ntx); qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1)), extack); if (!qdisc) return -ENOMEM; priv->qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; mq_offload(sch, TC_MQ_CREATE); return 0; } static void mq_attach(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); struct Qdisc *qdisc, *old; unsigned int ntx; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) qdisc_put(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); #endif } kfree(priv->qdiscs); priv->qdiscs = NULL; } static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); struct Qdisc *qdisc; unsigned int ntx; sch->q.qlen = 0; gnet_stats_basic_sync_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs * to account for all, none, or a mix of locked and unlocked child * qdiscs. Percpu stats are added to counters in-band and locking * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, &qdisc->bstats, false); gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, &qdisc->qstats); sch->q.qlen += qdisc_qlen(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } return mq_offload_stats(sch); } static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) { struct net_device *dev = qdisc_dev(sch); unsigned long ntx = cl - 1; if (ntx >= dev->num_tx_queues) return NULL; return netdev_get_tx_queue(dev, ntx); } static struct netdev_queue *mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm) { return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); } static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); struct tc_mq_qopt_offload graft_offload; struct net_device *dev = qdisc_dev(sch); if (dev->flags & IFF_UP) dev_deactivate(dev); *old = dev_graft_qdisc(dev_queue, new); if (new) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); graft_offload.handle = sch->handle; graft_offload.graft_params.queue = cl - 1; graft_offload.graft_params.child_handle = new ? new->handle : 0; graft_offload.command = TC_MQ_GRAFT; qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old, TC_SETUP_QDISC_MQ, &graft_offload, extack); return 0; } static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mq_find(struct Qdisc *sch, u32 classid) { unsigned int ntx = TC_H_MIN(classid); if (!mq_queue_get(sch, ntx)) return 0; return ntx; } static int mq_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; } static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct net_device *dev = qdisc_dev(sch); unsigned int ntx; if (arg->stop) return; arg->count = arg->skip; for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { if (!tc_qdisc_stats_dump(sch, ntx + 1, arg)) break; } } static const struct Qdisc_class_ops mq_class_ops = { .select_queue = mq_select_queue, .graft = mq_graft, .leaf = mq_leaf, .find = mq_find, .walk = mq_walk, .dump = mq_dump_class, .dump_stats = mq_dump_class_stats, }; struct Qdisc_ops mq_qdisc_ops __read_mostly = { .cl_ops = &mq_class_ops, .id = "mq", .priv_size = sizeof(struct mq_sched), .init = mq_init, .destroy = mq_destroy, .attach = mq_attach, .change_real_num_tx = mq_change_real_num_tx, .dump = mq_dump, .owner = THIS_MODULE, }; |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | // SPDX-License-Identifier: GPL-2.0+ /* * comedi/drivers/dt2817.c * Hardware driver for Data Translation DT2817 * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1998 David A. Schleef <ds@schleef.org> */ /* * Driver: dt2817 * Description: Data Translation DT2817 * Author: ds * Status: complete * Devices: [Data Translation] DT2817 (dt2817) * * A very simple digital I/O card. Four banks of 8 lines, each bank * is configurable for input or output. One wonders why it takes a * 50 page manual to describe this thing. * * The driver (which, btw, is much less than 50 pages) has 1 subdevice * with 32 channels, configurable in groups of 8. * * Configuration options: * [0] - I/O port base base address */ #include <linux/module.h> #include <linux/comedi/comedidev.h> #define DT2817_CR 0 #define DT2817_DATA 1 static int dt2817_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int oe = 0; unsigned int mask; int ret; if (chan < 8) mask = 0x000000ff; else if (chan < 16) mask = 0x0000ff00; else if (chan < 24) mask = 0x00ff0000; else mask = 0xff000000; ret = comedi_dio_insn_config(dev, s, insn, data, mask); if (ret) return ret; if (s->io_bits & 0x000000ff) oe |= 0x1; if (s->io_bits & 0x0000ff00) oe |= 0x2; if (s->io_bits & 0x00ff0000) oe |= 0x4; if (s->io_bits & 0xff000000) oe |= 0x8; outb(oe, dev->iobase + DT2817_CR); return insn->n; } static int dt2817_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned long iobase = dev->iobase + DT2817_DATA; unsigned int mask; unsigned int val; mask = comedi_dio_update_state(s, data); if (mask) { if (mask & 0x000000ff) outb(s->state & 0xff, iobase + 0); if (mask & 0x0000ff00) outb((s->state >> 8) & 0xff, iobase + 1); if (mask & 0x00ff0000) outb((s->state >> 16) & 0xff, iobase + 2); if (mask & 0xff000000) outb((s->state >> 24) & 0xff, iobase + 3); } val = inb(iobase + 0); val |= (inb(iobase + 1) << 8); val |= (inb(iobase + 2) << 16); val |= (inb(iobase + 3) << 24); data[1] = val; return insn->n; } static int dt2817_attach(struct comedi_device *dev, struct comedi_devconfig *it) { int ret; struct comedi_subdevice *s; ret = comedi_request_region(dev, it->options[0], 0x5); if (ret) return ret; ret = comedi_alloc_subdevices(dev, 1); if (ret) return ret; s = &dev->subdevices[0]; s->n_chan = 32; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->range_table = &range_digital; s->maxdata = 1; s->insn_bits = dt2817_dio_insn_bits; s->insn_config = dt2817_dio_insn_config; s->state = 0; outb(0, dev->iobase + DT2817_CR); return 0; } static struct comedi_driver dt2817_driver = { .driver_name = "dt2817", .module = THIS_MODULE, .attach = dt2817_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(dt2817_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi low-level driver"); MODULE_LICENSE("GPL"); |
| 66 34 3 2 3 3 3 36 3 8 6 4 4 3 4 2 2 3 2 2 2 3 3 3 7 8 2 10 1 4 3 2 8 1 3 2 2 2 7 1 4 2 9 1 3 3 2 7 1 2 2 2 9 1 4 4 3 5 1 3 1 7 1 2 2 2 9 1 3 2 3 2 36 36 2 3 3 3 1 1 1 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 6 6 3 4 4 3 3 36 36 36 2 34 1 35 36 23 39 31 29 2 30 1 29 2 6 30 22 21 1 14 8 14 8 28 11 16 15 16 15 44 4 2 1 2 2 1 2 4 1 3 136 136 139 22 22 47 42 41 1 1 40 2 41 1 40 2 40 2 41 1 41 1 41 1 34 1 2 2 2 35 7 11 1 2 4 1 3 5 22 22 22 22 22 21 20 19 1 20 17 17 1 16 35 35 3 32 1 34 5 5 3 5 33 1 36 36 36 35 1 47 47 5 4 1 35 4 37 2 36 33 2 37 1 1 36 2 3 30 1 36 2 34 33 2 1 34 11 10 1 66 2 56 1 8 1 7 64 64 64 64 57 4 133 71 130 125 14 1 1 1 1 131 5 2008 2006 51 48 24 4 3 2 2 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 | // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/macsec.c - MACsec device * * Copyright (c) 2015 Sabrina Dubroca <sd@queasysnail.net> */ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/module.h> #include <crypto/aead.h> #include <linux/etherdevice.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/refcount.h> #include <net/genetlink.h> #include <net/sock.h> #include <net/gro_cells.h> #include <net/macsec.h> #include <net/dst_metadata.h> #include <net/netdev_lock.h> #include <linux/phy.h> #include <linux/byteorder/generic.h> #include <linux/if_arp.h> #include <uapi/linux/if_macsec.h> /* SecTAG length = macsec_eth_header without the optional SCI */ #define MACSEC_TAG_LEN 6 struct macsec_eth_header { struct ethhdr eth; /* SecTAG */ u8 tci_an; #if defined(__LITTLE_ENDIAN_BITFIELD) u8 short_length:6, unused:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 unused:2, short_length:6; #else #error "Please fix <asm/byteorder.h>" #endif __be32 packet_number; u8 secure_channel_id[8]; /* optional */ } __packed; /* minimum secure data length deemed "not short", see IEEE 802.1AE-2006 9.7 */ #define MIN_NON_SHORT_LEN 48 #define GCM_AES_IV_LEN 12 #define for_each_rxsc(secy, sc) \ for (sc = rcu_dereference_bh(secy->rx_sc); \ sc; \ sc = rcu_dereference_bh(sc->next)) #define for_each_rxsc_rtnl(secy, sc) \ for (sc = rtnl_dereference(secy->rx_sc); \ sc; \ sc = rtnl_dereference(sc->next)) #define pn_same_half(pn1, pn2) (!(((pn1) >> 31) ^ ((pn2) >> 31))) struct gcm_iv_xpn { union { u8 short_secure_channel_id[4]; ssci_t ssci; }; __be64 pn; } __packed; struct gcm_iv { union { u8 secure_channel_id[8]; sci_t sci; }; __be32 pn; }; #define MACSEC_VALIDATE_DEFAULT MACSEC_VALIDATE_STRICT struct pcpu_secy_stats { struct macsec_dev_stats stats; struct u64_stats_sync syncp; }; /** * struct macsec_dev - private data * @secy: SecY config * @real_dev: pointer to underlying netdevice * @dev_tracker: refcount tracker for @real_dev reference * @stats: MACsec device stats * @secys: linked list of SecY's on the underlying device * @gro_cells: pointer to the Generic Receive Offload cell * @offload: status of offloading on the MACsec device * @insert_tx_tag: when offloading, device requires to insert an * additional tag */ struct macsec_dev { struct macsec_secy secy; struct net_device *real_dev; netdevice_tracker dev_tracker; struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; enum macsec_offload offload; bool insert_tx_tag; }; /** * struct macsec_rxh_data - rx_handler private argument * @secys: linked list of SecY's on this underlying device */ struct macsec_rxh_data { struct list_head secys; }; static struct macsec_dev *macsec_priv(const struct net_device *dev) { return (struct macsec_dev *)netdev_priv(dev); } static struct macsec_rxh_data *macsec_data_rcu(const struct net_device *dev) { return rcu_dereference_bh(dev->rx_handler_data); } static struct macsec_rxh_data *macsec_data_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->rx_handler_data); } struct macsec_cb { struct aead_request *req; union { struct macsec_tx_sa *tx_sa; struct macsec_rx_sa *rx_sa; }; u8 assoc_num; bool valid; bool has_sci; }; static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr) { struct macsec_rx_sa *sa = rcu_dereference_bh(ptr); if (!sa || !sa->active) return NULL; if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; } static void free_rx_sc_rcu(struct rcu_head *head) { struct macsec_rx_sc *rx_sc = container_of(head, struct macsec_rx_sc, rcu_head); free_percpu(rx_sc->stats); kfree(rx_sc); } static struct macsec_rx_sc *macsec_rxsc_get(struct macsec_rx_sc *sc) { return refcount_inc_not_zero(&sc->refcnt) ? sc : NULL; } static void macsec_rxsc_put(struct macsec_rx_sc *sc) { if (refcount_dec_and_test(&sc->refcnt)) call_rcu(&sc->rcu_head, free_rx_sc_rcu); } static void free_rxsa(struct rcu_head *head) { struct macsec_rx_sa *sa = container_of(head, struct macsec_rx_sa, rcu); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); kfree(sa); } static void macsec_rxsa_put(struct macsec_rx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_rxsa); } static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr) { struct macsec_tx_sa *sa = rcu_dereference_bh(ptr); if (!sa || !sa->active) return NULL; if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; } static void free_txsa(struct rcu_head *head) { struct macsec_tx_sa *sa = container_of(head, struct macsec_tx_sa, rcu); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); kfree(sa); } static void macsec_txsa_put(struct macsec_tx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_txsa); } static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct macsec_cb) > sizeof(skb->cb)); return (struct macsec_cb *)skb->cb; } #define MACSEC_PORT_SCB (0x0000) #define MACSEC_UNDEF_SCI ((__force sci_t)0xffffffffffffffffULL) #define MACSEC_UNDEF_SSCI ((__force ssci_t)0xffffffff) #define MACSEC_GCM_AES_128_SAK_LEN 16 #define MACSEC_GCM_AES_256_SAK_LEN 32 #define DEFAULT_SAK_LEN MACSEC_GCM_AES_128_SAK_LEN #define DEFAULT_XPN false #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 #define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) static sci_t make_sci(const u8 *addr, __be16 port) { sci_t sci; memcpy(&sci, addr, ETH_ALEN); memcpy(((char *)&sci) + ETH_ALEN, &port, sizeof(port)); return sci; } static sci_t macsec_active_sci(struct macsec_secy *secy) { struct macsec_rx_sc *rx_sc = rcu_dereference_bh(secy->rx_sc); /* Case single RX SC */ if (rx_sc && !rcu_dereference_bh(rx_sc->next)) return (rx_sc->active) ? rx_sc->sci : 0; /* Case no RX SC or multiple */ else return 0; } static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present, struct macsec_rxh_data *rxd) { struct macsec_dev *macsec; sci_t sci = 0; /* SC = 1 */ if (sci_present) { memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); /* SC = 0; ES = 0 */ } else if ((!(hdr->tci_an & (MACSEC_TCI_ES | MACSEC_TCI_SC))) && (list_is_singular(&rxd->secys))) { /* Only one SECY should exist on this scenario */ macsec = list_first_or_null_rcu(&rxd->secys, struct macsec_dev, secys); if (macsec) return macsec_active_sci(&macsec->secy); } else { sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); } return sci; } static unsigned int macsec_sectag_len(bool sci_present) { return MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); } static unsigned int macsec_hdr_len(bool sci_present) { return macsec_sectag_len(sci_present) + ETH_HLEN; } static unsigned int macsec_extra_len(bool sci_present) { return macsec_sectag_len(sci_present) + sizeof(__be16); } /* Fill SecTAG according to IEEE 802.1AE-2006 10.5.3 */ static void macsec_fill_sectag(struct macsec_eth_header *h, const struct macsec_secy *secy, u32 pn, bool sci_present) { const struct macsec_tx_sc *tx_sc = &secy->tx_sc; memset(&h->tci_an, 0, macsec_sectag_len(sci_present)); h->eth.h_proto = htons(ETH_P_MACSEC); if (sci_present) { h->tci_an |= MACSEC_TCI_SC; memcpy(&h->secure_channel_id, &secy->sci, sizeof(h->secure_channel_id)); } else { if (tx_sc->end_station) h->tci_an |= MACSEC_TCI_ES; if (tx_sc->scb) h->tci_an |= MACSEC_TCI_SCB; } h->packet_number = htonl(pn); /* with GCM, C/E clear for !encrypt, both set for encrypt */ if (tx_sc->encrypt) h->tci_an |= MACSEC_TCI_CONFID; else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) h->tci_an |= MACSEC_TCI_C; h->tci_an |= tx_sc->encoding_sa; } static void macsec_set_shortlen(struct macsec_eth_header *h, size_t data_len) { if (data_len < MIN_NON_SHORT_LEN) h->short_length = data_len; } /* Checks if a MACsec interface is being offloaded to an hardware engine */ static bool macsec_is_offloaded(struct macsec_dev *macsec) { if (macsec->offload == MACSEC_OFFLOAD_MAC || macsec->offload == MACSEC_OFFLOAD_PHY) return true; return false; } /* Checks if underlying layers implement MACsec offloading functions. */ static bool macsec_check_offload(enum macsec_offload offload, struct macsec_dev *macsec) { if (!macsec || !macsec->real_dev) return false; if (offload == MACSEC_OFFLOAD_PHY) return macsec->real_dev->phydev && macsec->real_dev->phydev->macsec_ops; else if (offload == MACSEC_OFFLOAD_MAC) return macsec->real_dev->features & NETIF_F_HW_MACSEC && macsec->real_dev->macsec_ops; return false; } static const struct macsec_ops *__macsec_get_ops(enum macsec_offload offload, struct macsec_dev *macsec, struct macsec_context *ctx) { if (ctx) { memset(ctx, 0, sizeof(*ctx)); ctx->offload = offload; if (offload == MACSEC_OFFLOAD_PHY) ctx->phydev = macsec->real_dev->phydev; else if (offload == MACSEC_OFFLOAD_MAC) ctx->netdev = macsec->real_dev; } if (offload == MACSEC_OFFLOAD_PHY) return macsec->real_dev->phydev->macsec_ops; else return macsec->real_dev->macsec_ops; } /* Returns a pointer to the MACsec ops struct if any and updates the MACsec * context device reference if provided. */ static const struct macsec_ops *macsec_get_ops(struct macsec_dev *macsec, struct macsec_context *ctx) { if (!macsec_check_offload(macsec->offload, macsec)) return NULL; return __macsec_get_ops(macsec->offload, macsec, ctx); } /* validate MACsec packet according to IEEE 802.1AE-2018 9.12 */ static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len, bool xpn) { struct macsec_eth_header *h = (struct macsec_eth_header *)skb->data; int len = skb->len - 2 * ETH_ALEN; int extra_len = macsec_extra_len(!!(h->tci_an & MACSEC_TCI_SC)) + icv_len; /* a) It comprises at least 17 octets */ if (skb->len <= 16) return false; /* b) MACsec EtherType: already checked */ /* c) V bit is clear */ if (h->tci_an & MACSEC_TCI_VERSION) return false; /* d) ES or SCB => !SC */ if ((h->tci_an & MACSEC_TCI_ES || h->tci_an & MACSEC_TCI_SCB) && (h->tci_an & MACSEC_TCI_SC)) return false; /* e) Bits 7 and 8 of octet 4 of the SecTAG are clear */ if (h->unused) return false; /* rx.pn != 0 if not XPN (figure 10-5 with 802.11AEbw-2013 amendment) */ if (!h->packet_number && !xpn) return false; /* length check, f) g) h) i) */ if (h->short_length) return len == extra_len + h->short_length; return len >= extra_len + MIN_NON_SHORT_LEN; } #define MACSEC_NEEDED_HEADROOM (macsec_extra_len(true)) #define MACSEC_NEEDED_TAILROOM MACSEC_STD_ICV_LEN static void macsec_fill_iv_xpn(unsigned char *iv, ssci_t ssci, u64 pn, salt_t salt) { struct gcm_iv_xpn *gcm_iv = (struct gcm_iv_xpn *)iv; gcm_iv->ssci = ssci ^ salt.ssci; gcm_iv->pn = cpu_to_be64(pn) ^ salt.pn; } static void macsec_fill_iv(unsigned char *iv, sci_t sci, u32 pn) { struct gcm_iv *gcm_iv = (struct gcm_iv *)iv; gcm_iv->sci = sci; gcm_iv->pn = htonl(pn); } static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb) { return (struct macsec_eth_header *)skb_mac_header(skb); } static void __macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { pr_debug("PN wrapped, transitioning to !oper\n"); tx_sa->active = false; if (secy->protect_frames) secy->operational = false; } void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { spin_lock_bh(&tx_sa->lock); __macsec_pn_wrapped(secy, tx_sa); spin_unlock_bh(&tx_sa->lock); } EXPORT_SYMBOL_GPL(macsec_pn_wrapped); static pn_t tx_sa_update_pn(struct macsec_tx_sa *tx_sa, struct macsec_secy *secy) { pn_t pn; spin_lock_bh(&tx_sa->lock); pn = tx_sa->next_pn_halves; if (secy->xpn) tx_sa->next_pn++; else tx_sa->next_pn_halves.lower++; if (tx_sa->next_pn == 0) __macsec_pn_wrapped(secy, tx_sa); spin_unlock_bh(&tx_sa->lock); return pn; } static void macsec_encrypt_finish(struct sk_buff *skb, struct net_device *dev) { struct macsec_dev *macsec = netdev_priv(dev); skb->dev = macsec->real_dev; skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; } static unsigned int macsec_msdu_len(struct sk_buff *skb) { struct macsec_dev *macsec = macsec_priv(skb->dev); struct macsec_secy *secy = &macsec->secy; bool sci_present = macsec_skb_cb(skb)->has_sci; return skb->len - macsec_hdr_len(sci_present) - secy->icv_len; } static void macsec_count_tx(struct sk_buff *skb, struct macsec_tx_sc *tx_sc, struct macsec_tx_sa *tx_sa) { unsigned int msdu_len = macsec_msdu_len(skb); struct pcpu_tx_sc_stats *txsc_stats = this_cpu_ptr(tx_sc->stats); u64_stats_update_begin(&txsc_stats->syncp); if (tx_sc->encrypt) { txsc_stats->stats.OutOctetsEncrypted += msdu_len; txsc_stats->stats.OutPktsEncrypted++; this_cpu_inc(tx_sa->stats->OutPktsEncrypted); } else { txsc_stats->stats.OutOctetsProtected += msdu_len; txsc_stats->stats.OutPktsProtected++; this_cpu_inc(tx_sa->stats->OutPktsProtected); } u64_stats_update_end(&txsc_stats->syncp); } static void count_tx(struct net_device *dev, int ret, int len) { if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) dev_sw_netstats_tx_add(dev, 1, len); } static void macsec_encrypt_done(void *data, int err) { struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_tx_sa *sa = macsec_skb_cb(skb)->tx_sa; int len, ret; aead_request_free(macsec_skb_cb(skb)->req); rcu_read_lock_bh(); macsec_count_tx(skb, &macsec->secy.tx_sc, macsec_skb_cb(skb)->tx_sa); /* packet is encrypted/protected so tx_bytes must be calculated */ len = macsec_msdu_len(skb) + 2 * ETH_ALEN; macsec_encrypt_finish(skb, dev); ret = dev_queue_xmit(skb); count_tx(dev, ret, len); rcu_read_unlock_bh(); macsec_txsa_put(sa); dev_put(dev); } static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, unsigned char **iv, struct scatterlist **sg, int num_frags) { size_t size, iv_offset, sg_offset; struct aead_request *req; void *tmp; size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm); iv_offset = size; size += GCM_AES_IV_LEN; size = ALIGN(size, __alignof__(struct scatterlist)); sg_offset = size; size += sizeof(struct scatterlist) * num_frags; tmp = kmalloc(size, GFP_ATOMIC); if (!tmp) return NULL; *iv = (unsigned char *)(tmp + iv_offset); *sg = (struct scatterlist *)(tmp + sg_offset); req = tmp; aead_request_set_tfm(req, tfm); return req; } static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct net_device *dev) { int ret; struct scatterlist *sg; struct sk_buff *trailer; unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; size_t unprotected_len; struct aead_request *req; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; struct macsec_dev *macsec = macsec_priv(dev); bool sci_present; pn_t pn; secy = &macsec->secy; tx_sc = &secy->tx_sc; /* 10.5.1 TX SA assignment */ tx_sa = macsec_txsa_get(tx_sc->sa[tx_sc->encoding_sa]); if (!tx_sa) { secy->operational = false; kfree_skb(skb); return ERR_PTR(-EINVAL); } if (unlikely(skb_headroom(skb) < MACSEC_NEEDED_HEADROOM || skb_tailroom(skb) < MACSEC_NEEDED_TAILROOM)) { struct sk_buff *nskb = skb_copy_expand(skb, MACSEC_NEEDED_HEADROOM, MACSEC_NEEDED_TAILROOM, GFP_ATOMIC); if (likely(nskb)) { consume_skb(skb); skb = nskb; } else { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } } else { skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { macsec_txsa_put(tx_sa); return ERR_PTR(-ENOMEM); } } unprotected_len = skb->len; eth = eth_hdr(skb); sci_present = macsec_send_sci(secy); hh = skb_push(skb, macsec_extra_len(sci_present)); memmove(hh, eth, 2 * ETH_ALEN); pn = tx_sa_update_pn(tx_sa, secy); if (pn.full64 == 0) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOLINK); } macsec_fill_sectag(hh, secy, pn.lower, sci_present); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); skb_put(skb, secy->icv_len); if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) { struct pcpu_secy_stats *secy_stats = this_cpu_ptr(macsec->stats); u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.OutPktsTooLong++; u64_stats_update_end(&secy_stats->syncp); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-EINVAL); } ret = skb_cow_data(skb, 0, &trailer); if (unlikely(ret < 0)) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); } req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } if (secy->xpn) macsec_fill_iv_xpn(iv, tx_sa->ssci, pn.full64, tx_sa->key.salt); else macsec_fill_iv(iv, secy->sci, pn.lower); sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { aead_request_free(req); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); } if (tx_sc->encrypt) { int len = skb->len - macsec_hdr_len(sci_present) - secy->icv_len; aead_request_set_crypt(req, sg, sg, len, iv); aead_request_set_ad(req, macsec_hdr_len(sci_present)); } else { aead_request_set_crypt(req, sg, sg, 0, iv); aead_request_set_ad(req, skb->len - secy->icv_len); } macsec_skb_cb(skb)->req = req; macsec_skb_cb(skb)->tx_sa = tx_sa; macsec_skb_cb(skb)->has_sci = sci_present; aead_request_set_callback(req, 0, macsec_encrypt_done, skb); dev_hold(skb->dev); ret = crypto_aead_encrypt(req); if (ret == -EINPROGRESS) { return ERR_PTR(ret); } else if (ret != 0) { dev_put(skb->dev); kfree_skb(skb); aead_request_free(req); macsec_txsa_put(tx_sa); return ERR_PTR(-EINVAL); } dev_put(skb->dev); aead_request_free(req); macsec_txsa_put(tx_sa); return skb; } static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u32 pn) { struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; struct pcpu_rx_sc_stats *rxsc_stats = this_cpu_ptr(rx_sa->sc->stats); struct macsec_eth_header *hdr = macsec_ethhdr(skb); u32 lowest_pn = 0; spin_lock(&rx_sa->lock); if (rx_sa->next_pn_halves.lower >= secy->replay_window) lowest_pn = rx_sa->next_pn_halves.lower - secy->replay_window; /* Now perform replay protection check again * (see IEEE 802.1AE-2006 figure 10-5) */ if (secy->replay_protect && pn < lowest_pn && (!secy->xpn || pn_same_half(pn, lowest_pn))) { spin_unlock(&rx_sa->lock); u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsLate++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_dropped); return false; } if (secy->validate_frames != MACSEC_VALIDATE_DISABLED) { unsigned int msdu_len = macsec_msdu_len(skb); u64_stats_update_begin(&rxsc_stats->syncp); if (hdr->tci_an & MACSEC_TCI_E) rxsc_stats->stats.InOctetsDecrypted += msdu_len; else rxsc_stats->stats.InOctetsValidated += msdu_len; u64_stats_update_end(&rxsc_stats->syncp); } if (!macsec_skb_cb(skb)->valid) { spin_unlock(&rx_sa->lock); /* 10.6.5 */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotValid++; u64_stats_update_end(&rxsc_stats->syncp); this_cpu_inc(rx_sa->stats->InPktsNotValid); DEV_STATS_INC(secy->netdev, rx_errors); return false; } u64_stats_update_begin(&rxsc_stats->syncp); if (secy->validate_frames == MACSEC_VALIDATE_CHECK) { rxsc_stats->stats.InPktsInvalid++; this_cpu_inc(rx_sa->stats->InPktsInvalid); } else if (pn < lowest_pn) { rxsc_stats->stats.InPktsDelayed++; } else { rxsc_stats->stats.InPktsUnchecked++; } u64_stats_update_end(&rxsc_stats->syncp); } else { u64_stats_update_begin(&rxsc_stats->syncp); if (pn < lowest_pn) { rxsc_stats->stats.InPktsDelayed++; } else { rxsc_stats->stats.InPktsOK++; this_cpu_inc(rx_sa->stats->InPktsOK); } u64_stats_update_end(&rxsc_stats->syncp); // Instead of "pn >=" - to support pn overflow in xpn if (pn + 1 > rx_sa->next_pn_halves.lower) { rx_sa->next_pn_halves.lower = pn + 1; } else if (secy->xpn && !pn_same_half(pn, rx_sa->next_pn_halves.lower)) { rx_sa->next_pn_halves.upper++; rx_sa->next_pn_halves.lower = pn + 1; } spin_unlock(&rx_sa->lock); } return true; } static void macsec_reset_skb(struct sk_buff *skb, struct net_device *dev) { skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); skb_reset_network_header(skb); if (!skb_transport_header_was_set(skb)) skb_reset_transport_header(skb); skb_reset_mac_len(skb); } static void macsec_finalize_skb(struct sk_buff *skb, u8 icv_len, u8 hdr_len) { skb->ip_summed = CHECKSUM_NONE; memmove(skb->data + hdr_len, skb->data, 2 * ETH_ALEN); skb_pull(skb, hdr_len); pskb_trim_unique(skb, skb->len - icv_len); } static void count_rx(struct net_device *dev, int len) { dev_sw_netstats_rx_add(dev, len); } static void macsec_decrypt_done(void *data, int err) { struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; struct macsec_rx_sc *rx_sc = rx_sa->sc; int len; u32 pn; aead_request_free(macsec_skb_cb(skb)->req); if (!err) macsec_skb_cb(skb)->valid = true; rcu_read_lock_bh(); pn = ntohl(macsec_ethhdr(skb)->packet_number); if (!macsec_post_decrypt(skb, &macsec->secy, pn)) { rcu_read_unlock_bh(); kfree_skb(skb); goto out; } macsec_finalize_skb(skb, macsec->secy.icv_len, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); len = skb->len; macsec_reset_skb(skb, macsec->secy.netdev); if (gro_cells_receive(&macsec->gro_cells, skb) == NET_RX_SUCCESS) count_rx(dev, len); rcu_read_unlock_bh(); out: macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); dev_put(dev); } static struct sk_buff *macsec_decrypt(struct sk_buff *skb, struct net_device *dev, struct macsec_rx_sa *rx_sa, sci_t sci, struct macsec_secy *secy) { int ret; struct scatterlist *sg; struct sk_buff *trailer; unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; u32 hdr_pn; u16 icv_len = secy->icv_len; macsec_skb_cb(skb)->valid = false; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); ret = skb_cow_data(skb, 0, &trailer); if (unlikely(ret < 0)) { kfree_skb(skb); return ERR_PTR(ret); } req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); } hdr = (struct macsec_eth_header *)skb->data; hdr_pn = ntohl(hdr->packet_number); if (secy->xpn) { pn_t recovered_pn = rx_sa->next_pn_halves; recovered_pn.lower = hdr_pn; if (hdr_pn < rx_sa->next_pn_halves.lower && !pn_same_half(hdr_pn, rx_sa->next_pn_halves.lower)) recovered_pn.upper++; macsec_fill_iv_xpn(iv, rx_sa->ssci, recovered_pn.full64, rx_sa->key.salt); } else { macsec_fill_iv(iv, sci, hdr_pn); } sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { aead_request_free(req); kfree_skb(skb); return ERR_PTR(ret); } if (hdr->tci_an & MACSEC_TCI_E) { /* confidentiality: ethernet + macsec header * authenticated, encrypted payload */ int len = skb->len - macsec_hdr_len(macsec_skb_cb(skb)->has_sci); aead_request_set_crypt(req, sg, sg, len, iv); aead_request_set_ad(req, macsec_hdr_len(macsec_skb_cb(skb)->has_sci)); skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { aead_request_free(req); return ERR_PTR(-ENOMEM); } } else { /* integrity only: all headers + data authenticated */ aead_request_set_crypt(req, sg, sg, icv_len, iv); aead_request_set_ad(req, skb->len - icv_len); } macsec_skb_cb(skb)->req = req; skb->dev = dev; aead_request_set_callback(req, 0, macsec_decrypt_done, skb); dev_hold(dev); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) { return ERR_PTR(ret); } else if (ret != 0) { /* decryption/authentication failed * 10.6 if validateFrames is disabled, deliver anyway */ if (ret != -EBADMSG) { kfree_skb(skb); skb = ERR_PTR(ret); } } else { macsec_skb_cb(skb)->valid = true; } dev_put(dev); aead_request_free(req); return skb; } static struct macsec_rx_sc *find_rx_sc(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc; for_each_rxsc(secy, rx_sc) { if (rx_sc->sci == sci) return rx_sc; } return NULL; } static struct macsec_rx_sc *find_rx_sc_rtnl(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc; for_each_rxsc_rtnl(secy, rx_sc) { if (rx_sc->sci == sci) return rx_sc; } return NULL; } static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) { /* Deliver to the uncontrolled port by default */ enum rx_handler_result ret = RX_HANDLER_PASS; struct ethhdr *hdr = eth_hdr(skb); struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; struct pcpu_secy_stats *secy_stats = this_cpu_ptr(macsec->stats); struct net_device *ndev = macsec->secy.netdev; /* If h/w offloading is enabled, HW decodes frames and strips * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { const struct macsec_ops *ops; ops = macsec_get_ops(macsec, NULL); if (ops->rx_uses_md_dst && !is_macsec_md_dst) continue; if (is_macsec_md_dst) { struct macsec_rx_sc *rx_sc; /* All drivers that implement MACsec offload * support using skb metadata destinations must * indicate that they do so. */ DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci); if (!rx_sc) continue; /* device indicated macsec offload occurred */ skb->dev = ndev; skb->pkt_type = PACKET_HOST; eth_skb_pkt_type(skb, ndev); ret = RX_HANDLER_ANOTHER; goto out; } /* This datapath is insecure because it is unable to * enforce isolation of broadcast/multicast traffic and * unicast traffic with promiscuous mode on the macsec * netdev. Since the core stack has no mechanism to * check that the hardware did indeed receive MACsec * traffic, it is possible that the response handling * done by the MACsec port was to a plaintext packet. * This violates the MACsec protocol standard. */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; goto out; } else if (is_multicast_ether_addr_64bits( hdr->h_dest)) { /* multicast frame, deliver on this port too */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; nskb->dev = ndev; eth_skb_pkt_type(nskb, ndev); __netif_rx(nskb); } else if (ndev->flags & IFF_PROMISC) { skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; goto out; } continue; } /* 10.6 If the management control validateFrames is not * Strict, frames without a SecTAG are received, counted, and * delivered to the Controlled Port */ if (macsec->secy.validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsNoTag++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_dropped); continue; } /* deliver on this port */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; nskb->dev = ndev; if (__netif_rx(nskb) == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); } } out: rcu_read_unlock(); return ret; } static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct macsec_eth_header *hdr; struct macsec_secy *secy = NULL; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; sci_t sci = 0; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; struct pcpu_secy_stats *secy_stats; bool pulled_sci; int ret; if (skb_headroom(skb) < ETH_HLEN) goto drop_direct; hdr = macsec_ethhdr(skb); if (hdr->eth.h_proto != htons(ETH_P_MACSEC)) return handle_not_macsec(skb); skb = skb_unshare(skb, GFP_ATOMIC); *pskb = skb; if (!skb) return RX_HANDLER_CONSUMED; pulled_sci = pskb_may_pull(skb, macsec_extra_len(true)); if (!pulled_sci) { if (!pskb_may_pull(skb, macsec_extra_len(false))) goto drop_direct; } hdr = macsec_ethhdr(skb); /* Frames with a SecTAG that has the TCI E bit set but the C * bit clear are discarded, as this reserved encoding is used * to identify frames with a SecTAG that are not to be * delivered to the Controlled Port. */ if ((hdr->tci_an & (MACSEC_TCI_C | MACSEC_TCI_E)) == MACSEC_TCI_E) return RX_HANDLER_PASS; /* now, pull the extra length */ if (hdr->tci_an & MACSEC_TCI_SC) { if (!pulled_sci) goto drop_direct; } /* ethernet header is part of crypto processing */ skb_push(skb, ETH_HLEN); macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci, rxd); if (!sci) goto drop_nosc; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); sc = sc ? macsec_rxsc_get(sc) : NULL; if (sc) { secy = &macsec->secy; rx_sc = sc; break; } } if (!secy) goto nosci; dev = secy->netdev; macsec = macsec_priv(dev); secy_stats = this_cpu_ptr(macsec->stats); rxsc_stats = this_cpu_ptr(rx_sc->stats); if (!macsec_validate_skb(skb, secy->icv_len, secy->xpn)) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsBadTag++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); goto drop_nosa; } rx_sa = macsec_rxsa_get(rx_sc->sa[macsec_skb_cb(skb)->assoc_num]); if (!rx_sa) { /* 10.6.1 if the SA is not in use */ /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotUsingSA++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); goto drop_nosa; } /* not Strict, the frame (with the SecTAG and ICV * removed) is delivered to the Controlled Port. */ u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsUnusedSA++; u64_stats_update_end(&rxsc_stats->syncp); goto deliver; } /* First, PN check to avoid decrypting obviously wrong packets */ hdr_pn = ntohl(hdr->packet_number); if (secy->replay_protect) { bool late; spin_lock(&rx_sa->lock); late = rx_sa->next_pn_halves.lower >= secy->replay_window && hdr_pn < (rx_sa->next_pn_halves.lower - secy->replay_window); if (secy->xpn) late = late && pn_same_half(rx_sa->next_pn_halves.lower, hdr_pn); spin_unlock(&rx_sa->lock); if (late) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsLate++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_dropped); goto drop; } } macsec_skb_cb(skb)->rx_sa = rx_sa; /* Disabled && !changed text => skip validation */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames != MACSEC_VALIDATE_DISABLED) skb = macsec_decrypt(skb, dev, rx_sa, sci, secy); if (IS_ERR(skb)) { /* the decrypt callback needs the reference */ if (PTR_ERR(skb) != -EINPROGRESS) { macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); } rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; } if (!macsec_post_decrypt(skb, secy, hdr_pn)) goto drop; deliver: macsec_finalize_skb(skb, secy->icv_len, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); len = skb->len; macsec_reset_skb(skb, secy->netdev); if (rx_sa) macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); skb_orphan(skb); ret = gro_cells_receive(&macsec->gro_cells, skb); if (ret == NET_RX_SUCCESS) count_rx(dev, len); else DEV_STATS_INC(macsec->secy.netdev, rx_dropped); rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; drop: macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); drop_nosc: rcu_read_unlock(); drop_direct: kfree_skb(skb); *pskb = NULL; return RX_HANDLER_CONSUMED; nosci: /* 10.6.1 if the SC is not found */ cbit = !!(hdr->tci_an & MACSEC_TCI_C); if (!cbit) macsec_finalize_skb(skb, MACSEC_DEFAULT_ICV_LEN, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; secy_stats = this_cpu_ptr(macsec->stats); /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ if (cbit || macsec->secy.validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsNoSCI++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_errors); continue; } /* not strict, the frame (with the SecTAG and ICV * removed) is delivered to the Controlled Port. */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; macsec_reset_skb(nskb, macsec->secy.netdev); ret = __netif_rx(nskb); if (ret == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUnknownSCI++; u64_stats_update_end(&secy_stats->syncp); } else { DEV_STATS_INC(macsec->secy.netdev, rx_dropped); } } rcu_read_unlock(); *pskb = skb; return RX_HANDLER_PASS; } static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len) { struct crypto_aead *tfm; int ret; tfm = crypto_alloc_aead("gcm(aes)", 0, 0); if (IS_ERR(tfm)) return tfm; ret = crypto_aead_setkey(tfm, key, key_len); if (ret < 0) goto fail; ret = crypto_aead_setauthsize(tfm, icv_len); if (ret < 0) goto fail; return tfm; fail: crypto_free_aead(tfm); return ERR_PTR(ret); } static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len, int icv_len) { rx_sa->stats = alloc_percpu(struct macsec_rx_sa_stats); if (!rx_sa->stats) return -ENOMEM; rx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len); if (IS_ERR(rx_sa->key.tfm)) { free_percpu(rx_sa->stats); return PTR_ERR(rx_sa->key.tfm); } rx_sa->ssci = MACSEC_UNDEF_SSCI; rx_sa->active = false; rx_sa->next_pn = 1; refcount_set(&rx_sa->refcnt, 1); spin_lock_init(&rx_sa->lock); return 0; } static void clear_rx_sa(struct macsec_rx_sa *rx_sa) { rx_sa->active = false; macsec_rxsa_put(rx_sa); } static void free_rx_sc(struct macsec_rx_sc *rx_sc) { int i; for (i = 0; i < MACSEC_NUM_AN; i++) { struct macsec_rx_sa *sa = rtnl_dereference(rx_sc->sa[i]); RCU_INIT_POINTER(rx_sc->sa[i], NULL); if (sa) clear_rx_sa(sa); } macsec_rxsc_put(rx_sc); } static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc, __rcu **rx_scp; for (rx_scp = &secy->rx_sc, rx_sc = rtnl_dereference(*rx_scp); rx_sc; rx_scp = &rx_sc->next, rx_sc = rtnl_dereference(*rx_scp)) { if (rx_sc->sci == sci) { if (rx_sc->active) secy->n_rx_sc--; rcu_assign_pointer(*rx_scp, rx_sc->next); return rx_sc; } } return NULL; } static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci, bool active) { struct macsec_rx_sc *rx_sc; struct macsec_dev *macsec; struct net_device *real_dev = macsec_priv(dev)->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); struct macsec_secy *secy; list_for_each_entry(macsec, &rxd->secys, secys) { if (find_rx_sc_rtnl(&macsec->secy, sci)) return ERR_PTR(-EEXIST); } rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL); if (!rx_sc) return ERR_PTR(-ENOMEM); rx_sc->stats = netdev_alloc_pcpu_stats(struct pcpu_rx_sc_stats); if (!rx_sc->stats) { kfree(rx_sc); return ERR_PTR(-ENOMEM); } rx_sc->sci = sci; rx_sc->active = active; refcount_set(&rx_sc->refcnt, 1); secy = &macsec_priv(dev)->secy; rcu_assign_pointer(rx_sc->next, secy->rx_sc); rcu_assign_pointer(secy->rx_sc, rx_sc); if (rx_sc->active) secy->n_rx_sc++; return rx_sc; } static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len, int icv_len) { tx_sa->stats = alloc_percpu(struct macsec_tx_sa_stats); if (!tx_sa->stats) return -ENOMEM; tx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len); if (IS_ERR(tx_sa->key.tfm)) { free_percpu(tx_sa->stats); return PTR_ERR(tx_sa->key.tfm); } tx_sa->ssci = MACSEC_UNDEF_SSCI; tx_sa->active = false; refcount_set(&tx_sa->refcnt, 1); spin_lock_init(&tx_sa->lock); return 0; } static void clear_tx_sa(struct macsec_tx_sa *tx_sa) { tx_sa->active = false; macsec_txsa_put(tx_sa); } static struct genl_family macsec_fam; static struct net_device *get_dev_from_nl(struct net *net, struct nlattr **attrs) { int ifindex = nla_get_u32(attrs[MACSEC_ATTR_IFINDEX]); struct net_device *dev; dev = __dev_get_by_index(net, ifindex); if (!dev) return ERR_PTR(-ENODEV); if (!netif_is_macsec(dev)) return ERR_PTR(-ENODEV); return dev; } static enum macsec_offload nla_get_offload(const struct nlattr *nla) { return (__force enum macsec_offload)nla_get_u8(nla); } static sci_t nla_get_sci(const struct nlattr *nla) { return (__force sci_t)nla_get_u64(nla); } static int nla_put_sci(struct sk_buff *skb, int attrtype, sci_t value, int padattr) { return nla_put_u64_64bit(skb, attrtype, (__force u64)value, padattr); } static ssci_t nla_get_ssci(const struct nlattr *nla) { return (__force ssci_t)nla_get_u32(nla); } static int nla_put_ssci(struct sk_buff *skb, int attrtype, ssci_t value) { return nla_put_u32(skb, attrtype, (__force u64)value); } static struct macsec_tx_sa *get_txsa_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_sa, struct net_device **devp, struct macsec_secy **secyp, struct macsec_tx_sc **scp, u8 *assoc_num) { struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; if (!tb_sa[MACSEC_SA_ATTR_AN]) return ERR_PTR(-EINVAL); *assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); dev = get_dev_from_nl(net, attrs); if (IS_ERR(dev)) return ERR_CAST(dev); secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; tx_sa = rtnl_dereference(tx_sc->sa[*assoc_num]); if (!tx_sa) return ERR_PTR(-ENODEV); *devp = dev; *scp = tx_sc; *secyp = secy; return tx_sa; } static struct macsec_rx_sc *get_rxsc_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_rxsc, struct net_device **devp, struct macsec_secy **secyp) { struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; sci_t sci; dev = get_dev_from_nl(net, attrs); if (IS_ERR(dev)) return ERR_CAST(dev); secy = &macsec_priv(dev)->secy; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return ERR_PTR(-EINVAL); sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); rx_sc = find_rx_sc_rtnl(secy, sci); if (!rx_sc) return ERR_PTR(-ENODEV); *secyp = secy; *devp = dev; return rx_sc; } static struct macsec_rx_sa *get_rxsa_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_rxsc, struct nlattr **tb_sa, struct net_device **devp, struct macsec_secy **secyp, struct macsec_rx_sc **scp, u8 *assoc_num) { struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; if (!tb_sa[MACSEC_SA_ATTR_AN]) return ERR_PTR(-EINVAL); *assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); rx_sc = get_rxsc_from_nl(net, attrs, tb_rxsc, devp, secyp); if (IS_ERR(rx_sc)) return ERR_CAST(rx_sc); rx_sa = rtnl_dereference(rx_sc->sa[*assoc_num]); if (!rx_sa) return ERR_PTR(-ENODEV); *scp = rx_sc; return rx_sa; } static const struct nla_policy macsec_genl_policy[NUM_MACSEC_ATTR] = { [MACSEC_ATTR_IFINDEX] = { .type = NLA_U32 }, [MACSEC_ATTR_RXSC_CONFIG] = { .type = NLA_NESTED }, [MACSEC_ATTR_SA_CONFIG] = { .type = NLA_NESTED }, [MACSEC_ATTR_OFFLOAD] = { .type = NLA_NESTED }, }; static const struct nla_policy macsec_genl_rxsc_policy[NUM_MACSEC_RXSC_ATTR] = { [MACSEC_RXSC_ATTR_SCI] = { .type = NLA_U64 }, [MACSEC_RXSC_ATTR_ACTIVE] = NLA_POLICY_MAX(NLA_U8, 1), }; static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_AN] = NLA_POLICY_MAX(NLA_U8, MACSEC_NUM_AN - 1), [MACSEC_SA_ATTR_ACTIVE] = NLA_POLICY_MAX(NLA_U8, 1), [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN(NLA_UINT, 1), [MACSEC_SA_ATTR_KEYID] = NLA_POLICY_EXACT_LEN(MACSEC_KEYID_LEN), [MACSEC_SA_ATTR_KEY] = NLA_POLICY_MAX_LEN(MACSEC_MAX_KEY_LEN), [MACSEC_SA_ATTR_SSCI] = { .type = NLA_U32 }, [MACSEC_SA_ATTR_SALT] = NLA_POLICY_EXACT_LEN(MACSEC_SALT_LEN), }; static const struct nla_policy macsec_genl_offload_policy[NUM_MACSEC_OFFLOAD_ATTR] = { [MACSEC_OFFLOAD_ATTR_TYPE] = NLA_POLICY_MAX(NLA_U8, MACSEC_OFFLOAD_MAX), }; /* Offloads an operation to a device driver */ static int macsec_offload(int (* const func)(struct macsec_context *), struct macsec_context *ctx) { int ret; if (unlikely(!func)) return 0; if (ctx->offload == MACSEC_OFFLOAD_PHY) mutex_lock(&ctx->phydev->lock); ret = (*func)(ctx); if (ctx->offload == MACSEC_OFFLOAD_PHY) mutex_unlock(&ctx->phydev->lock); return ret; } static int parse_sa_config(struct nlattr **attrs, struct nlattr **tb_sa) { if (!attrs[MACSEC_ATTR_SA_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb_sa, MACSEC_SA_ATTR_MAX, attrs[MACSEC_ATTR_SA_CONFIG], macsec_genl_sa_policy, NULL)) return -EINVAL; return 0; } static int parse_rxsc_config(struct nlattr **attrs, struct nlattr **tb_rxsc) { if (!attrs[MACSEC_ATTR_RXSC_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb_rxsc, MACSEC_RXSC_ATTR_MAX, attrs[MACSEC_ATTR_RXSC_CONFIG], macsec_genl_rxsc_policy, NULL)) return -EINVAL; return 0; } static bool validate_add_rxsa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || !attrs[MACSEC_SA_ATTR_KEY] || !attrs[MACSEC_SA_ATTR_KEYID]) return false; return true; } static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct nlattr **attrs = info->attrs; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; unsigned char assoc_num; int pn_len; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int err; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!validate_add_rxsa(tb_sa)) return -EINVAL; rtnl_lock(); rx_sc = get_rxsc_from_nl(genl_info_net(info), attrs, tb_rxsc, &dev, &secy); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (nla_len(tb_sa[MACSEC_SA_ATTR_KEY]) != secy->key_len) { pr_notice("macsec: nl: add_rxsa: bad key length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); rtnl_unlock(); return -EINVAL; } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (tb_sa[MACSEC_SA_ATTR_PN] && nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } if (secy->xpn) { if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { rtnl_unlock(); return -EINVAL; } } rx_sa = rtnl_dereference(rx_sc->sa[assoc_num]); if (rx_sa) { rtnl_unlock(); return -EBUSY; } rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL); if (!rx_sa) { rtnl_unlock(); return -ENOMEM; } err = init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, secy->icv_len); if (err < 0) { kfree(rx_sa); rtnl_unlock(); return err; } if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); rx_sa->sc = rx_sc; if (secy->xpn) { rx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); nla_memcpy(rx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], MACSEC_SALT_LEN); } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); err = macsec_offload(ops->mdo_add_rxsa, &ctx); memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(rx_sc->sa[assoc_num], rx_sa); rtnl_unlock(); return 0; cleanup: macsec_rxsa_put(rx_sa); rtnl_unlock(); return err; } static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; sci_t sci = MACSEC_UNDEF_SCI; struct nlattr **attrs = info->attrs; struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; bool active = true; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); rx_sc = create_rx_sc(dev, sci, active); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_add_rxsc, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: del_rx_sc(secy, sci); free_rx_sc(rx_sc); rtnl_unlock(); return ret; } static bool validate_add_txsa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || !attrs[MACSEC_SA_ATTR_PN] || !attrs[MACSEC_SA_ATTR_KEY] || !attrs[MACSEC_SA_ATTR_KEYID]) return false; return true; } static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct nlattr **attrs = info->attrs; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; unsigned char assoc_num; int pn_len; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational; int err; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_add_txsa(tb_sa)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (nla_len(tb_sa[MACSEC_SA_ATTR_KEY]) != secy->key_len) { pr_notice("macsec: nl: add_txsa: bad key length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); rtnl_unlock(); return -EINVAL; } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_txsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } if (secy->xpn) { if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { rtnl_unlock(); return -EINVAL; } } tx_sa = rtnl_dereference(tx_sc->sa[assoc_num]); if (tx_sa) { rtnl_unlock(); return -EBUSY; } tx_sa = kmalloc(sizeof(*tx_sa), GFP_KERNEL); if (!tx_sa) { rtnl_unlock(); return -ENOMEM; } err = init_tx_sa(tx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, secy->icv_len); if (err < 0) { kfree(tx_sa); rtnl_unlock(); return err; } spin_lock_bh(&tx_sa->lock); tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) tx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); was_operational = secy->operational; if (assoc_num == tx_sc->encoding_sa && tx_sa->active) secy->operational = true; if (secy->xpn) { tx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); nla_memcpy(tx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], MACSEC_SALT_LEN); } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); err = macsec_offload(ops->mdo_add_txsa, &ctx); memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(tx_sc->sa[assoc_num], tx_sa); rtnl_unlock(); return 0; cleanup: secy->operational = was_operational; macsec_txsa_put(tx_sa); rtnl_unlock(); return err; } static int macsec_del_rxsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; u8 assoc_num; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; rtnl_lock(); rx_sa = get_rxsa_from_nl(genl_info_net(info), attrs, tb_rxsc, tb_sa, &dev, &secy, &rx_sc, &assoc_num); if (IS_ERR(rx_sa)) { rtnl_unlock(); return PTR_ERR(rx_sa); } if (rx_sa->active) { rtnl_unlock(); return -EBUSY; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_rxsa, &ctx); if (ret) goto cleanup; } RCU_INIT_POINTER(rx_sc->sa[assoc_num], NULL); clear_rx_sa(rx_sa); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static int macsec_del_rxsc(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; sci_t sci; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), info->attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); rx_sc = del_rx_sc(secy, sci); if (!rx_sc) { rtnl_unlock(); return -ENODEV; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_rxsc, &ctx); if (ret) goto cleanup; } free_rx_sc(rx_sc); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static int macsec_del_txsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; u8 assoc_num; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; rtnl_lock(); tx_sa = get_txsa_from_nl(genl_info_net(info), attrs, tb_sa, &dev, &secy, &tx_sc, &assoc_num); if (IS_ERR(tx_sa)) { rtnl_unlock(); return PTR_ERR(tx_sa); } if (tx_sa->active) { rtnl_unlock(); return -EBUSY; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_txsa, &ctx); if (ret) goto cleanup; } RCU_INIT_POINTER(tx_sc->sa[assoc_num], NULL); clear_tx_sa(tx_sa); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static bool validate_upd_sa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || attrs[MACSEC_SA_ATTR_KEY] || attrs[MACSEC_SA_ATTR_KEYID] || attrs[MACSEC_SA_ATTR_SSCI] || attrs[MACSEC_SA_ATTR_SALT]) return false; return true; } static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; u8 assoc_num; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational, was_active; pn_t prev_pn; int ret = 0; prev_pn.full64 = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_upd_sa(tb_sa)) return -EINVAL; rtnl_lock(); tx_sa = get_txsa_from_nl(genl_info_net(info), attrs, tb_sa, &dev, &secy, &tx_sc, &assoc_num); if (IS_ERR(tx_sa)) { rtnl_unlock(); return PTR_ERR(tx_sa); } if (tb_sa[MACSEC_SA_ATTR_PN]) { int pn_len; pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: upd_txsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } spin_lock_bh(&tx_sa->lock); prev_pn = tx_sa->next_pn_halves; tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); } was_active = tx_sa->active; if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) tx_sa->active = nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); was_operational = secy->operational; if (assoc_num == tx_sc->encoding_sa) secy->operational = tx_sa->active; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_txsa, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&tx_sa->lock); tx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&tx_sa->lock); } tx_sa->active = was_active; secy->operational = was_operational; rtnl_unlock(); return ret; } static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; u8 assoc_num; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_active; pn_t prev_pn; int ret = 0; prev_pn.full64 = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_upd_sa(tb_sa)) return -EINVAL; rtnl_lock(); rx_sa = get_rxsa_from_nl(genl_info_net(info), attrs, tb_rxsc, tb_sa, &dev, &secy, &rx_sc, &assoc_num); if (IS_ERR(rx_sa)) { rtnl_unlock(); return PTR_ERR(rx_sa); } if (tb_sa[MACSEC_SA_ATTR_PN]) { int pn_len; pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: upd_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } spin_lock_bh(&rx_sa->lock); prev_pn = rx_sa->next_pn_halves; rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } was_active = rx_sa->active; if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsa, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); rx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&rx_sa->lock); } rx_sa->active = was_active; rtnl_unlock(); return ret; } static int macsec_upd_rxsc(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; unsigned int prev_n_rx_sc; bool was_active; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return -EINVAL; rtnl_lock(); rx_sc = get_rxsc_from_nl(genl_info_net(info), attrs, tb_rxsc, &dev, &secy); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } was_active = rx_sc->active; prev_n_rx_sc = secy->n_rx_sc; if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) { bool new = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); if (rx_sc->active != new) secy->n_rx_sc += new ? 1 : -1; rx_sc->active = new; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsc, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: secy->n_rx_sc = prev_n_rx_sc; rx_sc->active = was_active; rtnl_unlock(); return ret; } static bool macsec_is_configured(struct macsec_dev *macsec) { struct macsec_secy *secy = &macsec->secy; struct macsec_tx_sc *tx_sc = &secy->tx_sc; int i; if (secy->rx_sc) return true; for (i = 0; i < MACSEC_NUM_AN; i++) if (tx_sc->sa[i]) return true; return false; } static bool macsec_needs_tx_tag(struct macsec_dev *macsec, const struct macsec_ops *ops) { return macsec->offload == MACSEC_OFFLOAD_PHY && ops->mdo_insert_tx_tag; } static void macsec_set_head_tail_room(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; int needed_headroom, needed_tailroom; const struct macsec_ops *ops; ops = macsec_get_ops(macsec, NULL); if (ops) { needed_headroom = ops->needed_headroom; needed_tailroom = ops->needed_tailroom; } else { needed_headroom = MACSEC_NEEDED_HEADROOM; needed_tailroom = MACSEC_NEEDED_TAILROOM; } dev->needed_headroom = real_dev->needed_headroom + needed_headroom; dev->needed_tailroom = real_dev->needed_tailroom + needed_tailroom; } static void macsec_inherit_tso_max(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); /* if macsec is offloaded, we need to follow the lower * device's capabilities. otherwise, we can ignore them. */ if (macsec_is_offloaded(macsec)) netif_inherit_tso_max(dev, macsec->real_dev); } static int macsec_update_offload(struct net_device *dev, enum macsec_offload offload) { enum macsec_offload prev_offload; const struct macsec_ops *ops; struct macsec_context ctx; struct macsec_dev *macsec; int ret = 0; macsec = macsec_priv(dev); /* Check if the offloading mode is supported by the underlying layers */ if (offload != MACSEC_OFFLOAD_OFF && !macsec_check_offload(offload, macsec)) return -EOPNOTSUPP; /* Check if the net device is busy. */ if (netif_running(dev)) return -EBUSY; /* Check if the device already has rules configured: we do not support * rules migration. */ if (macsec_is_configured(macsec)) return -EBUSY; prev_offload = macsec->offload; ops = __macsec_get_ops(offload == MACSEC_OFFLOAD_OFF ? prev_offload : offload, macsec, &ctx); if (!ops) return -EOPNOTSUPP; macsec->offload = offload; ctx.secy = &macsec->secy; ret = offload == MACSEC_OFFLOAD_OFF ? macsec_offload(ops->mdo_del_secy, &ctx) : macsec_offload(ops->mdo_add_secy, &ctx); if (ret) { macsec->offload = prev_offload; return ret; } macsec_set_head_tail_room(dev); macsec->insert_tx_tag = macsec_needs_tx_tag(macsec, ops); macsec_inherit_tso_max(dev); netdev_update_features(dev); return ret; } static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb_offload[MACSEC_OFFLOAD_ATTR_MAX + 1]; struct nlattr **attrs = info->attrs; enum macsec_offload offload; struct macsec_dev *macsec; struct net_device *dev; int ret = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (!attrs[MACSEC_ATTR_OFFLOAD]) return -EINVAL; if (nla_parse_nested_deprecated(tb_offload, MACSEC_OFFLOAD_ATTR_MAX, attrs[MACSEC_ATTR_OFFLOAD], macsec_genl_offload_policy, NULL)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { ret = PTR_ERR(dev); goto out; } macsec = macsec_priv(dev); if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) { ret = -EINVAL; goto out; } offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]); if (macsec->offload != offload) ret = macsec_update_offload(dev, offload); out: rtnl_unlock(); return ret; } static void get_tx_sa_stats(struct net_device *dev, int an, struct macsec_tx_sa *tx_sa, struct macsec_tx_sa_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.sa.assoc_num = an; ctx.sa.tx_sa = tx_sa; ctx.stats.tx_sa_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_tx_sa_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct macsec_tx_sa_stats *stats = per_cpu_ptr(tx_sa->stats, cpu); sum->OutPktsProtected += stats->OutPktsProtected; sum->OutPktsEncrypted += stats->OutPktsEncrypted; } } static int copy_tx_sa_stats(struct sk_buff *skb, struct macsec_tx_sa_stats *sum) { if (nla_put_u32(skb, MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED, sum->OutPktsProtected) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED, sum->OutPktsEncrypted)) return -EMSGSIZE; return 0; } static void get_rx_sa_stats(struct net_device *dev, struct macsec_rx_sc *rx_sc, int an, struct macsec_rx_sa *rx_sa, struct macsec_rx_sa_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.sa.assoc_num = an; ctx.sa.rx_sa = rx_sa; ctx.stats.rx_sa_stats = sum; ctx.secy = &macsec_priv(dev)->secy; ctx.rx_sc = rx_sc; macsec_offload(ops->mdo_get_rx_sa_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct macsec_rx_sa_stats *stats = per_cpu_ptr(rx_sa->stats, cpu); sum->InPktsOK += stats->InPktsOK; sum->InPktsInvalid += stats->InPktsInvalid; sum->InPktsNotValid += stats->InPktsNotValid; sum->InPktsNotUsingSA += stats->InPktsNotUsingSA; sum->InPktsUnusedSA += stats->InPktsUnusedSA; } } static int copy_rx_sa_stats(struct sk_buff *skb, struct macsec_rx_sa_stats *sum) { if (nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_OK, sum->InPktsOK) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_INVALID, sum->InPktsInvalid) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_VALID, sum->InPktsNotValid) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_USING_SA, sum->InPktsNotUsingSA) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA, sum->InPktsUnusedSA)) return -EMSGSIZE; return 0; } static void get_rx_sc_stats(struct net_device *dev, struct macsec_rx_sc *rx_sc, struct macsec_rx_sc_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.rx_sc_stats = sum; ctx.secy = &macsec_priv(dev)->secy; ctx.rx_sc = rx_sc; macsec_offload(ops->mdo_get_rx_sc_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_rx_sc_stats *stats; struct macsec_rx_sc_stats tmp; unsigned int start; stats = per_cpu_ptr(rx_sc->stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->InOctetsValidated += tmp.InOctetsValidated; sum->InOctetsDecrypted += tmp.InOctetsDecrypted; sum->InPktsUnchecked += tmp.InPktsUnchecked; sum->InPktsDelayed += tmp.InPktsDelayed; sum->InPktsOK += tmp.InPktsOK; sum->InPktsInvalid += tmp.InPktsInvalid; sum->InPktsLate += tmp.InPktsLate; sum->InPktsNotValid += tmp.InPktsNotValid; sum->InPktsNotUsingSA += tmp.InPktsNotUsingSA; sum->InPktsUnusedSA += tmp.InPktsUnusedSA; } } static int copy_rx_sc_stats(struct sk_buff *skb, struct macsec_rx_sc_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED, sum->InOctetsValidated, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED, sum->InOctetsDecrypted, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED, sum->InPktsUnchecked, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED, sum->InPktsDelayed, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK, sum->InPktsOK, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID, sum->InPktsInvalid, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE, sum->InPktsLate, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, sum->InPktsNotValid, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, sum->InPktsNotUsingSA, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, sum->InPktsUnusedSA, MACSEC_RXSC_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; } static void get_tx_sc_stats(struct net_device *dev, struct macsec_tx_sc_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.tx_sc_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_tx_sc_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_tx_sc_stats *stats; struct macsec_tx_sc_stats tmp; unsigned int start; stats = per_cpu_ptr(macsec_priv(dev)->secy.tx_sc.stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->OutPktsProtected += tmp.OutPktsProtected; sum->OutPktsEncrypted += tmp.OutPktsEncrypted; sum->OutOctetsProtected += tmp.OutOctetsProtected; sum->OutOctetsEncrypted += tmp.OutOctetsEncrypted; } } static int copy_tx_sc_stats(struct sk_buff *skb, struct macsec_tx_sc_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED, sum->OutPktsProtected, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, sum->OutPktsEncrypted, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, sum->OutOctetsProtected, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, sum->OutOctetsEncrypted, MACSEC_TXSC_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; } static void get_secy_stats(struct net_device *dev, struct macsec_dev_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.dev_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_dev_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_secy_stats *stats; struct macsec_dev_stats tmp; unsigned int start; stats = per_cpu_ptr(macsec_priv(dev)->stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->OutPktsUntagged += tmp.OutPktsUntagged; sum->InPktsUntagged += tmp.InPktsUntagged; sum->OutPktsTooLong += tmp.OutPktsTooLong; sum->InPktsNoTag += tmp.InPktsNoTag; sum->InPktsBadTag += tmp.InPktsBadTag; sum->InPktsUnknownSCI += tmp.InPktsUnknownSCI; sum->InPktsNoSCI += tmp.InPktsNoSCI; sum->InPktsOverrun += tmp.InPktsOverrun; } } static int copy_secy_stats(struct sk_buff *skb, struct macsec_dev_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED, sum->OutPktsUntagged, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED, sum->InPktsUntagged, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG, sum->OutPktsTooLong, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG, sum->InPktsNoTag, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG, sum->InPktsBadTag, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI, sum->InPktsUnknownSCI, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI, sum->InPktsNoSCI, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN, sum->InPktsOverrun, MACSEC_SECY_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; } static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb) { struct macsec_tx_sc *tx_sc = &secy->tx_sc; struct nlattr *secy_nest = nla_nest_start_noflag(skb, MACSEC_ATTR_SECY); u64 csid; if (!secy_nest) return 1; switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_128 : MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_256 : MACSEC_CIPHER_ID_GCM_AES_256; break; default: goto cancel; } if (nla_put_sci(skb, MACSEC_SECY_ATTR_SCI, secy->sci, MACSEC_SECY_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_ATTR_CIPHER_SUITE, csid, MACSEC_SECY_ATTR_PAD) || nla_put_u8(skb, MACSEC_SECY_ATTR_ICV_LEN, secy->icv_len) || nla_put_u8(skb, MACSEC_SECY_ATTR_OPER, secy->operational) || nla_put_u8(skb, MACSEC_SECY_ATTR_PROTECT, secy->protect_frames) || nla_put_u8(skb, MACSEC_SECY_ATTR_REPLAY, secy->replay_protect) || nla_put_u8(skb, MACSEC_SECY_ATTR_VALIDATE, secy->validate_frames) || nla_put_u8(skb, MACSEC_SECY_ATTR_ENCRYPT, tx_sc->encrypt) || nla_put_u8(skb, MACSEC_SECY_ATTR_INC_SCI, tx_sc->send_sci) || nla_put_u8(skb, MACSEC_SECY_ATTR_ES, tx_sc->end_station) || nla_put_u8(skb, MACSEC_SECY_ATTR_SCB, tx_sc->scb) || nla_put_u8(skb, MACSEC_SECY_ATTR_ENCODING_SA, tx_sc->encoding_sa)) goto cancel; if (secy->replay_protect) { if (nla_put_u32(skb, MACSEC_SECY_ATTR_WINDOW, secy->replay_window)) goto cancel; } nla_nest_end(skb, secy_nest); return 0; cancel: nla_nest_cancel(skb, secy_nest); return 1; } static noinline_for_stack int dump_secy(struct macsec_secy *secy, struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { struct macsec_tx_sc_stats tx_sc_stats = {0, }; struct macsec_tx_sa_stats tx_sa_stats = {0, }; struct macsec_rx_sc_stats rx_sc_stats = {0, }; struct macsec_rx_sa_stats rx_sa_stats = {0, }; struct macsec_dev *macsec = netdev_priv(dev); struct macsec_dev_stats dev_stats = {0, }; struct macsec_tx_sc *tx_sc = &secy->tx_sc; struct nlattr *txsa_list, *rxsc_list; struct macsec_rx_sc *rx_sc; struct nlattr *attr; void *hdr; int i, j; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &macsec_fam, NLM_F_MULTI, MACSEC_CMD_GET_TXSC); if (!hdr) return -EMSGSIZE; genl_dump_check_consistent(cb, hdr); if (nla_put_u32(skb, MACSEC_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; attr = nla_nest_start_noflag(skb, MACSEC_ATTR_OFFLOAD); if (!attr) goto nla_put_failure; if (nla_put_u8(skb, MACSEC_OFFLOAD_ATTR_TYPE, macsec->offload)) goto nla_put_failure; nla_nest_end(skb, attr); if (nla_put_secy(secy, skb)) goto nla_put_failure; attr = nla_nest_start_noflag(skb, MACSEC_ATTR_TXSC_STATS); if (!attr) goto nla_put_failure; get_tx_sc_stats(dev, &tx_sc_stats); if (copy_tx_sc_stats(skb, &tx_sc_stats)) { nla_nest_cancel(skb, attr); goto nla_put_failure; } nla_nest_end(skb, attr); attr = nla_nest_start_noflag(skb, MACSEC_ATTR_SECY_STATS); if (!attr) goto nla_put_failure; get_secy_stats(dev, &dev_stats); if (copy_secy_stats(skb, &dev_stats)) { nla_nest_cancel(skb, attr); goto nla_put_failure; } nla_nest_end(skb, attr); txsa_list = nla_nest_start_noflag(skb, MACSEC_ATTR_TXSA_LIST); if (!txsa_list) goto nla_put_failure; for (i = 0, j = 1; i < MACSEC_NUM_AN; i++) { struct macsec_tx_sa *tx_sa = rtnl_dereference(tx_sc->sa[i]); struct nlattr *txsa_nest; u64 pn; int pn_len; if (!tx_sa) continue; txsa_nest = nla_nest_start_noflag(skb, j++); if (!txsa_nest) { nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } attr = nla_nest_start_noflag(skb, MACSEC_SA_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } memset(&tx_sa_stats, 0, sizeof(tx_sa_stats)); get_tx_sa_stats(dev, i, tx_sa, &tx_sa_stats); if (copy_tx_sa_stats(skb, &tx_sa_stats)) { nla_nest_cancel(skb, attr); nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } nla_nest_end(skb, attr); if (secy->xpn) { pn = tx_sa->next_pn; pn_len = MACSEC_XPN_PN_LEN; } else { pn = tx_sa->next_pn_halves.lower; pn_len = MACSEC_DEFAULT_PN_LEN; } if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put(skb, MACSEC_SA_ATTR_PN, pn_len, &pn) || nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, tx_sa->key.id) || (secy->xpn && nla_put_ssci(skb, MACSEC_SA_ATTR_SSCI, tx_sa->ssci)) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, tx_sa->active)) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } nla_nest_end(skb, txsa_nest); } nla_nest_end(skb, txsa_list); rxsc_list = nla_nest_start_noflag(skb, MACSEC_ATTR_RXSC_LIST); if (!rxsc_list) goto nla_put_failure; j = 1; for_each_rxsc_rtnl(secy, rx_sc) { int k; struct nlattr *rxsa_list; struct nlattr *rxsc_nest = nla_nest_start_noflag(skb, j++); if (!rxsc_nest) { nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } if (nla_put_u8(skb, MACSEC_RXSC_ATTR_ACTIVE, rx_sc->active) || nla_put_sci(skb, MACSEC_RXSC_ATTR_SCI, rx_sc->sci, MACSEC_RXSC_ATTR_PAD)) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } attr = nla_nest_start_noflag(skb, MACSEC_RXSC_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } memset(&rx_sc_stats, 0, sizeof(rx_sc_stats)); get_rx_sc_stats(dev, rx_sc, &rx_sc_stats); if (copy_rx_sc_stats(skb, &rx_sc_stats)) { nla_nest_cancel(skb, attr); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } nla_nest_end(skb, attr); rxsa_list = nla_nest_start_noflag(skb, MACSEC_RXSC_ATTR_SA_LIST); if (!rxsa_list) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } for (i = 0, k = 1; i < MACSEC_NUM_AN; i++) { struct macsec_rx_sa *rx_sa = rtnl_dereference(rx_sc->sa[i]); struct nlattr *rxsa_nest; u64 pn; int pn_len; if (!rx_sa) continue; rxsa_nest = nla_nest_start_noflag(skb, k++); if (!rxsa_nest) { nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } attr = nla_nest_start_noflag(skb, MACSEC_SA_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } memset(&rx_sa_stats, 0, sizeof(rx_sa_stats)); get_rx_sa_stats(dev, rx_sc, i, rx_sa, &rx_sa_stats); if (copy_rx_sa_stats(skb, &rx_sa_stats)) { nla_nest_cancel(skb, attr); nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } nla_nest_end(skb, attr); if (secy->xpn) { pn = rx_sa->next_pn; pn_len = MACSEC_XPN_PN_LEN; } else { pn = rx_sa->next_pn_halves.lower; pn_len = MACSEC_DEFAULT_PN_LEN; } if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put(skb, MACSEC_SA_ATTR_PN, pn_len, &pn) || nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, rx_sa->key.id) || (secy->xpn && nla_put_ssci(skb, MACSEC_SA_ATTR_SSCI, rx_sa->ssci)) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, rx_sa->active)) { nla_nest_cancel(skb, rxsa_nest); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } nla_nest_end(skb, rxsa_nest); } nla_nest_end(skb, rxsa_list); nla_nest_end(skb, rxsc_nest); } nla_nest_end(skb, rxsc_list); genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } static int macsec_generation = 1; /* protected by RTNL */ static int macsec_dump_txsc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct net_device *dev; int dev_idx, d; dev_idx = cb->args[0]; d = 0; rtnl_lock(); cb->seq = macsec_generation; for_each_netdev(net, dev) { struct macsec_secy *secy; if (d < dev_idx) goto next; if (!netif_is_macsec(dev)) goto next; secy = &macsec_priv(dev)->secy; if (dump_secy(secy, dev, skb, cb) < 0) goto done; next: d++; } done: rtnl_unlock(); cb->args[0] = d; return skb->len; } static const struct genl_small_ops macsec_genl_ops[] = { { .cmd = MACSEC_CMD_GET_TXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = macsec_dump_txsc, }, { .cmd = MACSEC_CMD_ADD_RXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_TXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_TXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_TXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_RXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_OFFLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_offload, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family macsec_fam __ro_after_init = { .name = MACSEC_GENL_NAME, .hdrsize = 0, .version = MACSEC_GENL_VERSION, .maxattr = MACSEC_ATTR_MAX, .policy = macsec_genl_policy, .netnsok = true, .module = THIS_MODULE, .small_ops = macsec_genl_ops, .n_small_ops = ARRAY_SIZE(macsec_genl_ops), .resv_start_op = MACSEC_CMD_UPD_OFFLOAD + 1, }; static struct sk_buff *macsec_insert_tx_tag(struct sk_buff *skb, struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); const struct macsec_ops *ops; struct phy_device *phydev; struct macsec_context ctx; int skb_final_len; int err; ops = macsec_get_ops(macsec, &ctx); skb_final_len = skb->len - ETH_HLEN + ops->needed_headroom + ops->needed_tailroom; if (unlikely(skb_final_len > macsec->real_dev->mtu)) { err = -EINVAL; goto cleanup; } phydev = macsec->real_dev->phydev; err = skb_ensure_writable_head_tail(skb, dev); if (unlikely(err < 0)) goto cleanup; err = ops->mdo_insert_tx_tag(phydev, skb); if (unlikely(err)) goto cleanup; return skb; cleanup: kfree_skb(skb); return ERR_PTR(err); } static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct macsec_dev *macsec = netdev_priv(dev); struct macsec_secy *secy = &macsec->secy; struct pcpu_secy_stats *secy_stats; int ret, len; if (macsec_is_offloaded(netdev_priv(dev))) { struct metadata_dst *md_dst = secy->tx_sc.md_dst; skb_dst_drop(skb); dst_hold(&md_dst->dst); skb_dst_set(skb, &md_dst->dst); if (macsec->insert_tx_tag) { skb = macsec_insert_tx_tag(skb, dev); if (IS_ERR(skb)) { DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } } skb->dev = macsec->real_dev; return dev_queue_xmit(skb); } /* 10.5 */ if (!secy->protect_frames) { secy_stats = this_cpu_ptr(macsec->stats); u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.OutPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); skb->dev = macsec->real_dev; len = skb->len; ret = dev_queue_xmit(skb); count_tx(dev, ret, len); return ret; } if (!secy->operational) { kfree_skb(skb); DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } len = skb->len; skb = macsec_encrypt(skb, dev); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EINPROGRESS) DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } macsec_count_tx(skb, &macsec->secy.tx_sc, macsec_skb_cb(skb)->tx_sa); macsec_encrypt_finish(skb, dev); ret = dev_queue_xmit(skb); count_tx(dev, ret, len); return ret; } #define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) #define MACSEC_OFFLOAD_FEATURES \ (MACSEC_FEATURES | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES | \ NETIF_F_LRO | NETIF_F_RXHASH | NETIF_F_CSUM_MASK | NETIF_F_RXCSUM) static int macsec_dev_init(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; int err; err = gro_cells_init(&macsec->gro_cells, dev); if (err) return err; macsec_inherit_tso_max(dev); dev->hw_features = real_dev->hw_features & MACSEC_OFFLOAD_FEATURES; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->features = real_dev->features & MACSEC_OFFLOAD_FEATURES; dev->features |= NETIF_F_GSO_SOFTWARE; dev->lltx = true; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; macsec_set_head_tail_room(dev); if (is_zero_ether_addr(dev->dev_addr)) eth_hw_addr_inherit(dev, real_dev); if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); /* Get macsec's reference to real_dev */ netdev_hold(real_dev, &macsec->dev_tracker, GFP_KERNEL); return 0; } static void macsec_dev_uninit(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); gro_cells_destroy(&macsec->gro_cells); } static netdev_features_t macsec_fix_features(struct net_device *dev, netdev_features_t features) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; netdev_features_t mask; mask = macsec_is_offloaded(macsec) ? MACSEC_OFFLOAD_FEATURES : MACSEC_FEATURES; features &= (real_dev->features & mask) | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES; return features; } static int macsec_dev_open(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; int err; err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) return err; if (dev->flags & IFF_ALLMULTI) { err = dev_set_allmulti(real_dev, 1); if (err < 0) goto del_unicast; } if (dev->flags & IFF_PROMISC) { err = dev_set_promiscuity(real_dev, 1); if (err < 0) goto clear_allmulti; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto clear_allmulti; } ctx.secy = &macsec->secy; err = macsec_offload(ops->mdo_dev_open, &ctx); if (err) goto clear_allmulti; } if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; clear_allmulti: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); del_unicast: dev_uc_del(real_dev, dev->dev_addr); netif_carrier_off(dev); return err; } static int macsec_dev_stop(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; netif_carrier_off(dev); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.secy = &macsec->secy; macsec_offload(ops->mdo_dev_stop, &ctx); } } dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); if (dev->flags & IFF_PROMISC) dev_set_promiscuity(real_dev, -1); dev_uc_del(real_dev, dev->dev_addr); return 0; } static void macsec_dev_change_rx_flags(struct net_device *dev, int change) { struct net_device *real_dev = macsec_priv(dev)->real_dev; if (!(dev->flags & IFF_UP)) return; if (change & IFF_ALLMULTI) dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); if (change & IFF_PROMISC) dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); } static void macsec_dev_set_rx_mode(struct net_device *dev) { struct net_device *real_dev = macsec_priv(dev)->real_dev; dev_mc_sync(real_dev, dev); dev_uc_sync(real_dev, dev); } static int macsec_set_mac_address(struct net_device *dev, void *p) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct sockaddr *addr = p; u8 old_addr[ETH_ALEN]; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; if (dev->flags & IFF_UP) { err = dev_uc_add(real_dev, addr->sa_data); if (err < 0) return err; } ether_addr_copy(old_addr, dev->dev_addr); eth_hw_addr_set(dev, addr->sa_data); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (!ops) { err = -EOPNOTSUPP; goto restore_old_addr; } ctx.secy = &macsec->secy; err = macsec_offload(ops->mdo_upd_secy, &ctx); if (err) goto restore_old_addr; } if (dev->flags & IFF_UP) dev_uc_del(real_dev, old_addr); return 0; restore_old_addr: if (dev->flags & IFF_UP) dev_uc_del(real_dev, addr->sa_data); eth_hw_addr_set(dev, old_addr); return err; } static int macsec_change_mtu(struct net_device *dev, int new_mtu) { struct macsec_dev *macsec = macsec_priv(dev); unsigned int extra = macsec->secy.icv_len + macsec_extra_len(true); if (macsec->real_dev->mtu - extra < new_mtu) return -ERANGE; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static void macsec_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *s) { if (!dev->tstats) return; dev_fetch_sw_netstats(s, dev->tstats); s->rx_dropped = DEV_STATS_READ(dev, rx_dropped); s->tx_dropped = DEV_STATS_READ(dev, tx_dropped); s->rx_errors = DEV_STATS_READ(dev, rx_errors); } static int macsec_get_iflink(const struct net_device *dev) { return READ_ONCE(macsec_priv(dev)->real_dev->ifindex); } static const struct net_device_ops macsec_netdev_ops = { .ndo_init = macsec_dev_init, .ndo_uninit = macsec_dev_uninit, .ndo_open = macsec_dev_open, .ndo_stop = macsec_dev_stop, .ndo_fix_features = macsec_fix_features, .ndo_change_mtu = macsec_change_mtu, .ndo_set_rx_mode = macsec_dev_set_rx_mode, .ndo_change_rx_flags = macsec_dev_change_rx_flags, .ndo_set_mac_address = macsec_set_mac_address, .ndo_start_xmit = macsec_start_xmit, .ndo_get_stats64 = macsec_get_stats64, .ndo_get_iflink = macsec_get_iflink, }; static const struct device_type macsec_type = { .name = "macsec", }; static int validate_cipher_suite(const struct nlattr *attr, struct netlink_ext_ack *extack); static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = NLA_POLICY_RANGE(NLA_U8, MACSEC_MIN_ICV_LEN, MACSEC_STD_ICV_LEN), [IFLA_MACSEC_CIPHER_SUITE] = NLA_POLICY_VALIDATE_FN(NLA_U64, validate_cipher_suite), [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, [IFLA_MACSEC_ENCODING_SA] = NLA_POLICY_MAX(NLA_U8, MACSEC_NUM_AN - 1), [IFLA_MACSEC_ENCRYPT] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_MACSEC_PROTECT] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_MACSEC_INC_SCI] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_MACSEC_ES] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_MACSEC_SCB] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_MACSEC_REPLAY_PROTECT] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_MACSEC_VALIDATION] = NLA_POLICY_MAX(NLA_U8, MACSEC_VALIDATE_MAX), [IFLA_MACSEC_OFFLOAD] = NLA_POLICY_MAX(NLA_U8, MACSEC_OFFLOAD_MAX), }; static void macsec_free_netdev(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); dst_release(&macsec->secy.tx_sc.md_dst->dst); free_percpu(macsec->stats); free_percpu(macsec->secy.tx_sc.stats); /* Get rid of the macsec's reference to real_dev */ netdev_put(macsec->real_dev, &macsec->dev_tracker); } static void macsec_setup(struct net_device *dev) { ether_setup(dev); dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; dev->priv_flags |= IFF_NO_QUEUE | IFF_UNICAST_FLT; dev->netdev_ops = &macsec_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = macsec_free_netdev; SET_NETDEV_DEVTYPE(dev, &macsec_type); eth_zero_addr(dev->broadcast); } static int macsec_changelink_common(struct net_device *dev, struct nlattr *data[]) { struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; if (data[IFLA_MACSEC_ENCODING_SA]) { struct macsec_tx_sa *tx_sa; tx_sc->encoding_sa = nla_get_u8(data[IFLA_MACSEC_ENCODING_SA]); tx_sa = rtnl_dereference(tx_sc->sa[tx_sc->encoding_sa]); secy->operational = tx_sa && tx_sa->active; } if (data[IFLA_MACSEC_ENCRYPT]) tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); if (data[IFLA_MACSEC_PROTECT]) secy->protect_frames = !!nla_get_u8(data[IFLA_MACSEC_PROTECT]); if (data[IFLA_MACSEC_INC_SCI]) tx_sc->send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); if (data[IFLA_MACSEC_ES]) tx_sc->end_station = !!nla_get_u8(data[IFLA_MACSEC_ES]); if (data[IFLA_MACSEC_SCB]) tx_sc->scb = !!nla_get_u8(data[IFLA_MACSEC_SCB]); if (data[IFLA_MACSEC_REPLAY_PROTECT]) secy->replay_protect = !!nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT]); if (data[IFLA_MACSEC_VALIDATION]) secy->validate_frames = nla_get_u8(data[IFLA_MACSEC_VALIDATION]); if (data[IFLA_MACSEC_CIPHER_SUITE]) { switch (nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE])) { case MACSEC_CIPHER_ID_GCM_AES_128: case MACSEC_DEFAULT_CIPHER_ID: secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; secy->xpn = false; break; case MACSEC_CIPHER_ID_GCM_AES_256: secy->key_len = MACSEC_GCM_AES_256_SAK_LEN; secy->xpn = false; break; case MACSEC_CIPHER_ID_GCM_AES_XPN_128: secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; secy->xpn = true; break; case MACSEC_CIPHER_ID_GCM_AES_XPN_256: secy->key_len = MACSEC_GCM_AES_256_SAK_LEN; secy->xpn = true; break; default: return -EINVAL; } } if (data[IFLA_MACSEC_WINDOW]) { secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window * for XPN cipher suites */ if (secy->xpn && secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) return -EINVAL; } return 0; } static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct macsec_dev *macsec = macsec_priv(dev); bool macsec_offload_state_change = false; enum macsec_offload offload; struct macsec_tx_sc tx_sc; struct macsec_secy secy; int ret; if (!data) return 0; if (data[IFLA_MACSEC_CIPHER_SUITE] || data[IFLA_MACSEC_ICV_LEN] || data[IFLA_MACSEC_SCI] || data[IFLA_MACSEC_PORT]) return -EINVAL; /* Keep a copy of unmodified secy and tx_sc, in case the offload * propagation fails, to revert macsec_changelink_common. */ memcpy(&secy, &macsec->secy, sizeof(secy)); memcpy(&tx_sc, &macsec->secy.tx_sc, sizeof(tx_sc)); ret = macsec_changelink_common(dev, data); if (ret) goto cleanup; if (data[IFLA_MACSEC_OFFLOAD]) { offload = nla_get_u8(data[IFLA_MACSEC_OFFLOAD]); if (macsec->offload != offload) { macsec_offload_state_change = true; ret = macsec_update_offload(dev, offload); if (ret) goto cleanup; } } /* If h/w offloading is available, propagate to the device */ if (!macsec_offload_state_change && macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.secy = &macsec->secy; ret = macsec_offload(ops->mdo_upd_secy, &ctx); if (ret) goto cleanup; } return 0; cleanup: memcpy(&macsec->secy.tx_sc, &tx_sc, sizeof(tx_sc)); memcpy(&macsec->secy, &secy, sizeof(secy)); return ret; } static void macsec_del_dev(struct macsec_dev *macsec) { int i; while (macsec->secy.rx_sc) { struct macsec_rx_sc *rx_sc = rtnl_dereference(macsec->secy.rx_sc); rcu_assign_pointer(macsec->secy.rx_sc, rx_sc->next); free_rx_sc(rx_sc); } for (i = 0; i < MACSEC_NUM_AN; i++) { struct macsec_tx_sa *sa = rtnl_dereference(macsec->secy.tx_sc.sa[i]); if (sa) { RCU_INIT_POINTER(macsec->secy.tx_sc.sa[i], NULL); clear_tx_sa(sa); } } } static void macsec_common_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (ops) { ctx.secy = &macsec->secy; macsec_offload(ops->mdo_del_secy, &ctx); } } unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); netdev_upper_dev_unlink(real_dev, dev); macsec_generation++; } static void macsec_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); macsec_common_dellink(dev, head); if (list_empty(&rxd->secys)) { netdev_rx_handler_unregister(real_dev); kfree(rxd); } } static int register_macsec_dev(struct net_device *real_dev, struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); if (!rxd) { int err; rxd = kmalloc(sizeof(*rxd), GFP_KERNEL); if (!rxd) return -ENOMEM; INIT_LIST_HEAD(&rxd->secys); err = netdev_rx_handler_register(real_dev, macsec_handle_frame, rxd); if (err < 0) { kfree(rxd); return err; } } list_add_tail_rcu(&macsec->secys, &rxd->secys); return 0; } static bool sci_exists(struct net_device *dev, sci_t sci) { struct macsec_rxh_data *rxd = macsec_data_rtnl(dev); struct macsec_dev *macsec; list_for_each_entry(macsec, &rxd->secys, secys) { if (macsec->secy.sci == sci) return true; } return false; } static sci_t dev_to_sci(struct net_device *dev, __be16 port) { return make_sci(dev->dev_addr, port); } static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); struct macsec_secy *secy = &macsec->secy; macsec->stats = netdev_alloc_pcpu_stats(struct pcpu_secy_stats); if (!macsec->stats) return -ENOMEM; secy->tx_sc.stats = netdev_alloc_pcpu_stats(struct pcpu_tx_sc_stats); if (!secy->tx_sc.stats) return -ENOMEM; secy->tx_sc.md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); if (!secy->tx_sc.md_dst) /* macsec and secy percpu stats will be freed when unregistering * net_device in macsec_free_netdev() */ return -ENOMEM; if (sci == MACSEC_UNDEF_SCI) sci = dev_to_sci(dev, MACSEC_PORT_ES); secy->netdev = dev; secy->operational = true; secy->key_len = DEFAULT_SAK_LEN; secy->icv_len = icv_len; secy->validate_frames = MACSEC_VALIDATE_DEFAULT; secy->protect_frames = true; secy->replay_protect = false; secy->xpn = DEFAULT_XPN; secy->sci = sci; secy->tx_sc.md_dst->u.macsec_info.sci = sci; secy->tx_sc.active = true; secy->tx_sc.encoding_sa = DEFAULT_ENCODING_SA; secy->tx_sc.encrypt = DEFAULT_ENCRYPT; secy->tx_sc.send_sci = DEFAULT_SEND_SCI; secy->tx_sc.end_station = false; secy->tx_sc.scb = false; return 0; } static struct lock_class_key macsec_netdev_addr_lock_key; static int macsec_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); struct macsec_dev *macsec = macsec_priv(dev); struct nlattr **data = params->data; struct nlattr **tb = params->tb; rx_handler_func_t *rx_handler; u8 icv_len = MACSEC_DEFAULT_ICV_LEN; struct net_device *real_dev; int err, mtu; sci_t sci; if (!tb[IFLA_LINK]) return -EINVAL; real_dev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; if (real_dev->type != ARPHRD_ETHER) return -EINVAL; dev->priv_flags |= IFF_MACSEC; macsec->real_dev = real_dev; if (data && data[IFLA_MACSEC_OFFLOAD]) macsec->offload = nla_get_offload(data[IFLA_MACSEC_OFFLOAD]); else /* MACsec offloading is off by default */ macsec->offload = MACSEC_OFFLOAD_OFF; /* Check if the offloading mode is supported by the underlying layers */ if (macsec->offload != MACSEC_OFFLOAD_OFF && !macsec_check_offload(macsec->offload, macsec)) return -EOPNOTSUPP; /* send_sci must be set to true when transmit sci explicitly is set */ if ((data && data[IFLA_MACSEC_SCI]) && (data && data[IFLA_MACSEC_INC_SCI])) { u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); if (!send_sci) return -EINVAL; } if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); mtu = real_dev->mtu - icv_len - macsec_extra_len(true); if (mtu < 0) dev->mtu = 0; else dev->mtu = mtu; rx_handler = rtnl_dereference(real_dev->rx_handler); if (rx_handler && rx_handler != macsec_handle_frame) return -EBUSY; err = register_netdevice(dev); if (err < 0) return err; netdev_lockdep_set_classes(dev); lockdep_set_class(&dev->addr_list_lock, &macsec_netdev_addr_lock_key); err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) goto unregister; /* need to be already registered so that ->init has run and * the MAC addr is set */ if (data && data[IFLA_MACSEC_SCI]) sci = nla_get_sci(data[IFLA_MACSEC_SCI]); else if (data && data[IFLA_MACSEC_PORT]) sci = dev_to_sci(dev, nla_get_be16(data[IFLA_MACSEC_PORT])); else sci = dev_to_sci(dev, MACSEC_PORT_ES); if (rx_handler && sci_exists(real_dev, sci)) { err = -EBUSY; goto unlink; } err = macsec_add_dev(dev, sci, icv_len); if (err) goto unlink; if (data) { err = macsec_changelink_common(dev, data); if (err) goto del_dev; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.secy = &macsec->secy; err = macsec_offload(ops->mdo_add_secy, &ctx); if (err) goto del_dev; macsec->insert_tx_tag = macsec_needs_tx_tag(macsec, ops); } } err = register_macsec_dev(real_dev, dev); if (err < 0) goto del_dev; netdev_update_features(dev); netif_stacked_transfer_operstate(real_dev, dev); linkwatch_fire_event(dev); macsec_generation++; return 0; del_dev: macsec_del_dev(macsec); unlink: netdev_upper_dev_unlink(real_dev, dev); unregister: unregister_netdevice(dev); return err; } static int validate_cipher_suite(const struct nlattr *attr, struct netlink_ext_ack *extack) { switch (nla_get_u64(attr)) { case MACSEC_CIPHER_ID_GCM_AES_128: case MACSEC_CIPHER_ID_GCM_AES_256: case MACSEC_CIPHER_ID_GCM_AES_XPN_128: case MACSEC_CIPHER_ID_GCM_AES_XPN_256: case MACSEC_DEFAULT_CIPHER_ID: return 0; default: return -EINVAL; } } static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u8 icv_len = MACSEC_DEFAULT_ICV_LEN; bool es, scb, sci; if (!data) return 0; if (data[IFLA_MACSEC_ICV_LEN]) { icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); if (icv_len != MACSEC_DEFAULT_ICV_LEN) { char dummy_key[DEFAULT_SAK_LEN] = { 0 }; struct crypto_aead *dummy_tfm; dummy_tfm = macsec_alloc_tfm(dummy_key, DEFAULT_SAK_LEN, icv_len); if (IS_ERR(dummy_tfm)) return PTR_ERR(dummy_tfm); crypto_free_aead(dummy_tfm); } } es = nla_get_u8_default(data[IFLA_MACSEC_ES], false); sci = nla_get_u8_default(data[IFLA_MACSEC_INC_SCI], false); scb = nla_get_u8_default(data[IFLA_MACSEC_SCB], false); if ((sci && (scb || es)) || (scb && es)) return -EINVAL; if ((data[IFLA_MACSEC_REPLAY_PROTECT] && nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT])) && !data[IFLA_MACSEC_WINDOW]) return -EINVAL; return 0; } static struct net *macsec_get_link_net(const struct net_device *dev) { return dev_net(macsec_priv(dev)->real_dev); } struct net_device *macsec_get_real_dev(const struct net_device *dev) { return macsec_priv(dev)->real_dev; } EXPORT_SYMBOL_GPL(macsec_get_real_dev); bool macsec_netdev_is_offloaded(struct net_device *dev) { return macsec_is_offloaded(macsec_priv(dev)); } EXPORT_SYMBOL_GPL(macsec_netdev_is_offloaded); static size_t macsec_get_size(const struct net_device *dev) { return nla_total_size_64bit(8) + /* IFLA_MACSEC_SCI */ nla_total_size(1) + /* IFLA_MACSEC_ICV_LEN */ nla_total_size_64bit(8) + /* IFLA_MACSEC_CIPHER_SUITE */ nla_total_size(4) + /* IFLA_MACSEC_WINDOW */ nla_total_size(1) + /* IFLA_MACSEC_ENCODING_SA */ nla_total_size(1) + /* IFLA_MACSEC_ENCRYPT */ nla_total_size(1) + /* IFLA_MACSEC_PROTECT */ nla_total_size(1) + /* IFLA_MACSEC_INC_SCI */ nla_total_size(1) + /* IFLA_MACSEC_ES */ nla_total_size(1) + /* IFLA_MACSEC_SCB */ nla_total_size(1) + /* IFLA_MACSEC_REPLAY_PROTECT */ nla_total_size(1) + /* IFLA_MACSEC_VALIDATION */ nla_total_size(1) + /* IFLA_MACSEC_OFFLOAD */ 0; } static int macsec_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct macsec_tx_sc *tx_sc; struct macsec_dev *macsec; struct macsec_secy *secy; u64 csid; macsec = macsec_priv(dev); secy = &macsec->secy; tx_sc = &secy->tx_sc; switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_128 : MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_256 : MACSEC_CIPHER_ID_GCM_AES_256; break; default: goto nla_put_failure; } if (nla_put_sci(skb, IFLA_MACSEC_SCI, secy->sci, IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ICV_LEN, secy->icv_len) || nla_put_u64_64bit(skb, IFLA_MACSEC_CIPHER_SUITE, csid, IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ENCODING_SA, tx_sc->encoding_sa) || nla_put_u8(skb, IFLA_MACSEC_ENCRYPT, tx_sc->encrypt) || nla_put_u8(skb, IFLA_MACSEC_PROTECT, secy->protect_frames) || nla_put_u8(skb, IFLA_MACSEC_INC_SCI, tx_sc->send_sci) || nla_put_u8(skb, IFLA_MACSEC_ES, tx_sc->end_station) || nla_put_u8(skb, IFLA_MACSEC_SCB, tx_sc->scb) || nla_put_u8(skb, IFLA_MACSEC_REPLAY_PROTECT, secy->replay_protect) || nla_put_u8(skb, IFLA_MACSEC_VALIDATION, secy->validate_frames) || nla_put_u8(skb, IFLA_MACSEC_OFFLOAD, macsec->offload) || 0) goto nla_put_failure; if (secy->replay_protect) { if (nla_put_u32(skb, IFLA_MACSEC_WINDOW, secy->replay_window)) goto nla_put_failure; } return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops macsec_link_ops __read_mostly = { .kind = "macsec", .priv_size = sizeof(struct macsec_dev), .maxtype = IFLA_MACSEC_MAX, .policy = macsec_rtnl_policy, .setup = macsec_setup, .validate = macsec_validate_attr, .newlink = macsec_newlink, .changelink = macsec_changelink, .dellink = macsec_dellink, .get_size = macsec_get_size, .fill_info = macsec_fill_info, .get_link_net = macsec_get_link_net, }; static bool is_macsec_master(struct net_device *dev) { return rcu_access_pointer(dev->rx_handler) == macsec_handle_frame; } static int macsec_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *real_dev = netdev_notifier_info_to_dev(ptr); struct macsec_rxh_data *rxd; struct macsec_dev *m, *n; LIST_HEAD(head); if (!is_macsec_master(real_dev)) return NOTIFY_DONE; rxd = macsec_data_rtnl(real_dev); switch (event) { case NETDEV_DOWN: case NETDEV_UP: case NETDEV_CHANGE: list_for_each_entry_safe(m, n, &rxd->secys, secys) { struct net_device *dev = m->secy.netdev; netif_stacked_transfer_operstate(real_dev, dev); } break; case NETDEV_UNREGISTER: list_for_each_entry_safe(m, n, &rxd->secys, secys) { macsec_common_dellink(m->secy.netdev, &head); } netdev_rx_handler_unregister(real_dev); kfree(rxd); unregister_netdevice_many(&head); break; case NETDEV_CHANGEMTU: list_for_each_entry(m, &rxd->secys, secys) { struct net_device *dev = m->secy.netdev; unsigned int mtu = real_dev->mtu - (m->secy.icv_len + macsec_extra_len(true)); if (dev->mtu > mtu) dev_set_mtu(dev, mtu); } break; case NETDEV_FEAT_CHANGE: list_for_each_entry(m, &rxd->secys, secys) { macsec_inherit_tso_max(m->secy.netdev); netdev_update_features(m->secy.netdev); } break; } return NOTIFY_OK; } static struct notifier_block macsec_notifier = { .notifier_call = macsec_notify, }; static int __init macsec_init(void) { int err; pr_info("MACsec IEEE 802.1AE\n"); err = register_netdevice_notifier(&macsec_notifier); if (err) return err; err = rtnl_link_register(&macsec_link_ops); if (err) goto notifier; err = genl_register_family(&macsec_fam); if (err) goto rtnl; return 0; rtnl: rtnl_link_unregister(&macsec_link_ops); notifier: unregister_netdevice_notifier(&macsec_notifier); return err; } static void __exit macsec_exit(void) { genl_unregister_family(&macsec_fam); rtnl_link_unregister(&macsec_link_ops); unregister_netdevice_notifier(&macsec_notifier); rcu_barrier(); } module_init(macsec_init); module_exit(macsec_exit); MODULE_ALIAS_RTNL_LINK("macsec"); MODULE_ALIAS_GENL_FAMILY("macsec"); MODULE_DESCRIPTION("MACsec IEEE 802.1AE"); MODULE_LICENSE("GPL v2"); |
| 65 64 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/core/netclassid_cgroup.c Classid Cgroupfs Handling * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fdtable.h> #include <linux/sched/task.h> #include <net/cls_cgroup.h> #include <net/sock.h> static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) { return css ? container_of(css, struct cgroup_cls_state, css) : NULL; } struct cgroup_cls_state *task_cls_state(struct task_struct *p) { return css_cls_state(task_css_check(p, net_cls_cgrp_id, rcu_read_lock_held() || rcu_read_lock_bh_held() || rcu_read_lock_trace_held())); } EXPORT_SYMBOL_GPL(task_cls_state); static struct cgroup_subsys_state * cgrp_css_alloc(struct cgroup_subsys_state *parent_css) { struct cgroup_cls_state *cs; cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); return &cs->css; } static int cgrp_css_online(struct cgroup_subsys_state *css) { struct cgroup_cls_state *cs = css_cls_state(css); struct cgroup_cls_state *parent = css_cls_state(css->parent); if (parent) cs->classid = parent->classid; return 0; } static void cgrp_css_free(struct cgroup_subsys_state *css) { kfree(css_cls_state(css)); } /* * To avoid freezing of sockets creation for tasks with big number of threads * and opened sockets lets release file_lock every 1000 iterated descriptors. * New sockets will already have been created with new classid. */ struct update_classid_context { u32 classid; unsigned int batch; }; #define UPDATE_CLASSID_BATCH 1000 static int update_classid_sock(const void *v, struct file *file, unsigned int n) { struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file); if (sock) sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); if (--ctx->batch == 0) { ctx->batch = UPDATE_CLASSID_BATCH; return n + 1; } return 0; } static void update_classid_task(struct task_struct *p, u32 classid) { struct update_classid_context ctx = { .classid = classid, .batch = UPDATE_CLASSID_BATCH }; unsigned int fd = 0; /* Only update the leader task, when many threads in this task, * so it can avoid the useless traversal. */ if (p != p->group_leader) return; do { task_lock(p); fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); task_unlock(p); cond_resched(); } while (fd); } static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { update_classid_task(p, css_cls_state(css)->classid); } } static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) { return css_cls_state(css)->classid; } static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, u64 value) { struct cgroup_cls_state *cs = css_cls_state(css); struct css_task_iter it; struct task_struct *p; cs->classid = (u32)value; css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) update_classid_task(p, cs->classid); css_task_iter_end(&it); return 0; } static struct cftype ss_files[] = { { .name = "classid", .read_u64 = read_classid, .write_u64 = write_classid, }, { } /* terminate */ }; struct cgroup_subsys net_cls_cgrp_subsys = { .css_alloc = cgrp_css_alloc, .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = cgrp_attach, .legacy_cftypes = ss_files, }; |
| 3 1 1 1 1 2 2 2 2 2 2 2 2 4 4 4 2 2 2 1 1 1 3 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | // SPDX-License-Identifier: GPL-2.0-or-later /* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6. * * Copyright (C) 2013 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> * * Based on: * net/ipv4/xfrm4_protocol.c */ #include <linux/init.h> #include <linux/mutex.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <net/ip6_route.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/xfrm.h> static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly; static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly; static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly; static DEFINE_MUTEX(xfrm6_protocol_mutex); static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) { switch (protocol) { case IPPROTO_ESP: return &esp6_handlers; case IPPROTO_AH: return &ah6_handlers; case IPPROTO_COMP: return &ipcomp6_handlers; } return NULL; } #define for_each_protocol_rcu(head, handler) \ for (handler = rcu_dereference(head); \ handler != NULL; \ handler = rcu_dereference(handler->next)) \ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm6_protocol *handler; struct xfrm6_protocol __rcu **head = proto_handlers(protocol); if (!head) return 0; for_each_protocol_rcu(*proto_handlers(protocol), handler) if ((ret = handler->cb_handler(skb, err)) <= 0) return ret; return 0; } int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { int ret; struct xfrm6_protocol *handler; struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); if (!head) goto out; if (!skb_dst(skb)) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); int flags = RT6_LOOKUP_F_HAS_SADDR; struct dst_entry *dst; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowlabel = ip6_flowinfo(ip6h), .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, skb, flags); if (dst->error) goto drop; skb_dst_set(skb, dst); } for_each_protocol_rcu(*head, handler) if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) return ret; out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); return 0; } EXPORT_SYMBOL(xfrm6_rcv_encap); static int xfrm6_esp_rcv(struct sk_buff *skb) { int ret; struct xfrm6_protocol *handler; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; for_each_protocol_rcu(esp6_handlers, handler) if ((ret = handler->handler(skb)) != -EINVAL) return ret; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); return 0; } static int xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(esp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) return 0; return -ENOENT; } static int xfrm6_ah_rcv(struct sk_buff *skb) { int ret; struct xfrm6_protocol *handler; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; for_each_protocol_rcu(ah6_handlers, handler) if ((ret = handler->handler(skb)) != -EINVAL) return ret; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); return 0; } static int xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ah6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) return 0; return -ENOENT; } static int xfrm6_ipcomp_rcv(struct sk_buff *skb) { int ret; struct xfrm6_protocol *handler; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; for_each_protocol_rcu(ipcomp6_handlers, handler) if ((ret = handler->handler(skb)) != -EINVAL) return ret; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); return 0; } static int xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct xfrm6_protocol *handler; for_each_protocol_rcu(ipcomp6_handlers, handler) if (!handler->err_handler(skb, opt, type, code, offset, info)) return 0; return -ENOENT; } static const struct inet6_protocol esp6_protocol = { .handler = xfrm6_esp_rcv, .err_handler = xfrm6_esp_err, .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol ah6_protocol = { .handler = xfrm6_ah_rcv, .err_handler = xfrm6_ah_err, .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol ipcomp6_protocol = { .handler = xfrm6_ipcomp_rcv, .err_handler = xfrm6_ipcomp_err, .flags = INET6_PROTO_NOPOLICY, }; static const struct xfrm_input_afinfo xfrm6_input_afinfo = { .family = AF_INET6, .callback = xfrm6_rcv_cb, }; static inline const struct inet6_protocol *netproto(unsigned char protocol) { switch (protocol) { case IPPROTO_ESP: return &esp6_protocol; case IPPROTO_AH: return &ah6_protocol; case IPPROTO_COMP: return &ipcomp6_protocol; } return NULL; } int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol) { struct xfrm6_protocol __rcu **pprev; struct xfrm6_protocol *t; bool add_netproto = false; int ret = -EEXIST; int priority = handler->priority; if (!proto_handlers(protocol) || !netproto(protocol)) return -EINVAL; mutex_lock(&xfrm6_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), lockdep_is_held(&xfrm6_protocol_mutex))) add_netproto = true; for (pprev = proto_handlers(protocol); (t = rcu_dereference_protected(*pprev, lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; pprev = &t->next) { if (t->priority < priority) break; if (t->priority == priority) goto err; } handler->next = *pprev; rcu_assign_pointer(*pprev, handler); ret = 0; err: mutex_unlock(&xfrm6_protocol_mutex); if (add_netproto) { if (inet6_add_protocol(netproto(protocol), protocol)) { pr_err("%s: can't add protocol\n", __func__); ret = -EAGAIN; } } return ret; } EXPORT_SYMBOL(xfrm6_protocol_register); int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol) { struct xfrm6_protocol __rcu **pprev; struct xfrm6_protocol *t; int ret = -ENOENT; if (!proto_handlers(protocol) || !netproto(protocol)) return -EINVAL; mutex_lock(&xfrm6_protocol_mutex); for (pprev = proto_handlers(protocol); (t = rcu_dereference_protected(*pprev, lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; pprev = &t->next) { if (t == handler) { *pprev = handler->next; ret = 0; break; } } if (!rcu_dereference_protected(*proto_handlers(protocol), lockdep_is_held(&xfrm6_protocol_mutex))) { if (inet6_del_protocol(netproto(protocol), protocol) < 0) { pr_err("%s: can't remove protocol\n", __func__); ret = -EAGAIN; } } mutex_unlock(&xfrm6_protocol_mutex); synchronize_net(); return ret; } EXPORT_SYMBOL(xfrm6_protocol_deregister); int __init xfrm6_protocol_init(void) { return xfrm_input_register_afinfo(&xfrm6_input_afinfo); } void xfrm6_protocol_fini(void) { xfrm_input_unregister_afinfo(&xfrm6_input_afinfo); } |
| 44152 51555 3 4197 41 51568 51637 51561 51555 51515 4221 4221 13 13 13 13 13 11 11 11 11 13 4 10 6 5 7 11 2 13 13 13 13 13 13 13 13 13 13 12 2 10 13 52 49 49 52 49 49 2 1 44151 44145 44146 44151 44145 1 4 3 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 | // SPDX-License-Identifier: GPL-2.0-only #include "cgroup-internal.h" #include <linux/sched/cputime.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <trace/events/cgroup.h> static DEFINE_SPINLOCK(rstat_base_lock); static DEFINE_PER_CPU(struct llist_head, rstat_backlog_list); static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu); /* * Determines whether a given css can participate in rstat. * css's that are cgroup::self use rstat for base stats. * Other css's associated with a subsystem use rstat only when * they define the ss->css_rstat_flush callback. */ static inline bool css_uses_rstat(struct cgroup_subsys_state *css) { return css_is_self(css) || css->ss->css_rstat_flush != NULL; } static struct css_rstat_cpu *css_rstat_cpu( struct cgroup_subsys_state *css, int cpu) { return per_cpu_ptr(css->rstat_cpu, cpu); } static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu( struct cgroup *cgrp, int cpu) { return per_cpu_ptr(cgrp->rstat_base_cpu, cpu); } static spinlock_t *ss_rstat_lock(struct cgroup_subsys *ss) { if (ss) return &ss->rstat_ss_lock; return &rstat_base_lock; } static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu) { if (ss) return per_cpu_ptr(ss->lhead, cpu); return per_cpu_ptr(&rstat_backlog_list, cpu); } /** * css_rstat_updated - keep track of updated rstat_cpu * @css: target cgroup subsystem state * @cpu: cpu on which rstat_cpu was updated * * Atomically inserts the css in the ss's llist for the given cpu. This is * reentrant safe i.e. safe against softirq, hardirq and nmi. The ss's llist * will be processed at the flush time to create the update tree. * * NOTE: if the user needs the guarantee that the updater either add itself in * the lockless list or the concurrent flusher flushes its updated stats, a * memory barrier is needed before the call to css_rstat_updated() i.e. a * barrier after updating the per-cpu stats and before calling * css_rstat_updated(). */ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) { struct llist_head *lhead; struct css_rstat_cpu *rstatc; struct css_rstat_cpu __percpu *rstatc_pcpu; struct llist_node *self; /* * Since bpf programs can call this function, prevent access to * uninitialized rstat pointers. */ if (!css_uses_rstat(css)) return; lockdep_assert_preemption_disabled(); /* * For archs withnot nmi safe cmpxchg or percpu ops support, ignore * the requests from nmi context. */ if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) || !IS_ENABLED(CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS)) && in_nmi()) return; rstatc = css_rstat_cpu(css, cpu); /* * If already on list return. This check is racy and smp_mb() is needed * to pair it with the smp_mb() in css_process_update_tree() if the * guarantee that the updated stats are visible to concurrent flusher is * needed. */ if (llist_on_list(&rstatc->lnode)) return; /* * This function can be renentered by irqs and nmis for the same cgroup * and may try to insert the same per-cpu lnode into the llist. Note * that llist_add() does not protect against such scenarios. * * To protect against such stacked contexts of irqs/nmis, we use the * fact that lnode points to itself when not on a list and then use * this_cpu_cmpxchg() to atomically set to NULL to select the winner * which will call llist_add(). The losers can assume the insertion is * successful and the winner will eventually add the per-cpu lnode to * the llist. */ self = &rstatc->lnode; rstatc_pcpu = css->rstat_cpu; if (this_cpu_cmpxchg(rstatc_pcpu->lnode.next, self, NULL) != self) return; lhead = ss_lhead_cpu(css->ss, cpu); llist_add(&rstatc->lnode, lhead); } static void __css_process_update_tree(struct cgroup_subsys_state *css, int cpu) { /* put @css and all ancestors on the corresponding updated lists */ while (true) { struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu); struct cgroup_subsys_state *parent = css->parent; struct css_rstat_cpu *prstatc; /* * Both additions and removals are bottom-up. If a cgroup * is already in the tree, all ancestors are. */ if (rstatc->updated_next) break; /* Root has no parent to link it to, but mark it busy */ if (!parent) { rstatc->updated_next = css; break; } prstatc = css_rstat_cpu(parent, cpu); rstatc->updated_next = prstatc->updated_children; prstatc->updated_children = css; css = parent; } } static void css_process_update_tree(struct cgroup_subsys *ss, int cpu) { struct llist_head *lhead = ss_lhead_cpu(ss, cpu); struct llist_node *lnode; while ((lnode = llist_del_first_init(lhead))) { struct css_rstat_cpu *rstatc; /* * smp_mb() is needed here (more specifically in between * init_llist_node() and per-cpu stats flushing) if the * guarantee is required by a rstat user where etiher the * updater should add itself on the lockless list or the * flusher flush the stats updated by the updater who have * observed that they are already on the list. The * corresponding barrier pair for this one should be before * css_rstat_updated() by the user. * * For now, there aren't any such user, so not adding the * barrier here but if such a use-case arise, please add * smp_mb() here. */ rstatc = container_of(lnode, struct css_rstat_cpu, lnode); __css_process_update_tree(rstatc->owner, cpu); } } /** * css_rstat_push_children - push children css's into the given list * @head: current head of the list (= subtree root) * @child: first child of the root * @cpu: target cpu * Return: A new singly linked list of css's to be flushed * * Iteratively traverse down the css_rstat_cpu updated tree level by * level and push all the parents first before their next level children * into a singly linked list via the rstat_flush_next pointer built from the * tail backward like "pushing" css's into a stack. The root is pushed by * the caller. */ static struct cgroup_subsys_state *css_rstat_push_children( struct cgroup_subsys_state *head, struct cgroup_subsys_state *child, int cpu) { struct cgroup_subsys_state *cnext = child; /* Next head of child css level */ struct cgroup_subsys_state *ghead = NULL; /* Head of grandchild css level */ struct cgroup_subsys_state *parent, *grandchild; struct css_rstat_cpu *crstatc; child->rstat_flush_next = NULL; /* * The subsystem rstat lock must be held for the whole duration from * here as the rstat_flush_next list is being constructed to when * it is consumed later in css_rstat_flush(). */ lockdep_assert_held(ss_rstat_lock(head->ss)); /* * Notation: -> updated_next pointer * => rstat_flush_next pointer * * Assuming the following sample updated_children lists: * P: C1 -> C2 -> P * C1: G11 -> G12 -> C1 * C2: G21 -> G22 -> C2 * * After 1st iteration: * head => C2 => C1 => NULL * ghead => G21 => G11 => NULL * * After 2nd iteration: * head => G12 => G11 => G22 => G21 => C2 => C1 => NULL */ next_level: while (cnext) { child = cnext; cnext = child->rstat_flush_next; parent = child->parent; /* updated_next is parent cgroup terminated if !NULL */ while (child != parent) { child->rstat_flush_next = head; head = child; crstatc = css_rstat_cpu(child, cpu); grandchild = crstatc->updated_children; if (grandchild != child) { /* Push the grand child to the next level */ crstatc->updated_children = child; grandchild->rstat_flush_next = ghead; ghead = grandchild; } child = crstatc->updated_next; crstatc->updated_next = NULL; } } if (ghead) { cnext = ghead; ghead = NULL; goto next_level; } return head; } /** * css_rstat_updated_list - build a list of updated css's to be flushed * @root: root of the css subtree to traverse * @cpu: target cpu * Return: A singly linked list of css's to be flushed * * Walks the updated rstat_cpu tree on @cpu from @root. During traversal, * each returned css is unlinked from the updated tree. * * The only ordering guarantee is that, for a parent and a child pair * covered by a given traversal, the child is before its parent in * the list. * * Note that updated_children is self terminated and points to a list of * child css's if not empty. Whereas updated_next is like a sibling link * within the children list and terminated by the parent css. An exception * here is the css root whose updated_next can be self terminated. */ static struct cgroup_subsys_state *css_rstat_updated_list( struct cgroup_subsys_state *root, int cpu) { struct css_rstat_cpu *rstatc = css_rstat_cpu(root, cpu); struct cgroup_subsys_state *head = NULL, *parent, *child; css_process_update_tree(root->ss, cpu); /* Return NULL if this subtree is not on-list */ if (!rstatc->updated_next) return NULL; /* * Unlink @root from its parent. As the updated_children list is * singly linked, we have to walk it to find the removal point. */ parent = root->parent; if (parent) { struct css_rstat_cpu *prstatc; struct cgroup_subsys_state **nextp; prstatc = css_rstat_cpu(parent, cpu); nextp = &prstatc->updated_children; while (*nextp != root) { struct css_rstat_cpu *nrstatc; nrstatc = css_rstat_cpu(*nextp, cpu); WARN_ON_ONCE(*nextp == parent); nextp = &nrstatc->updated_next; } *nextp = rstatc->updated_next; } rstatc->updated_next = NULL; /* Push @root to the list first before pushing the children */ head = root; root->rstat_flush_next = NULL; child = rstatc->updated_children; rstatc->updated_children = root; if (child != root) head = css_rstat_push_children(head, child, cpu); return head; } /* * A hook for bpf stat collectors to attach to and flush their stats. * Together with providing bpf kfuncs for css_rstat_updated() and * css_rstat_flush(), this enables a complete workflow where bpf progs that * collect cgroup stats can integrate with rstat for efficient flushing. * * A static noinline declaration here could cause the compiler to optimize away * the function. A global noinline declaration will keep the definition, but may * optimize away the callsite. Therefore, __weak is needed to ensure that the * call is still emitted, by telling the compiler that we don't know what the * function might eventually be. */ __bpf_hook_start(); __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu) { } __bpf_hook_end(); /* * Helper functions for locking. * * This makes it easier to diagnose locking issues and contention in * production environments. The parameter @cpu_in_loop indicate lock * was released and re-taken when collection data from the CPUs. The * value -1 is used when obtaining the main lock else this is the CPU * number processed last. */ static inline void __css_rstat_lock(struct cgroup_subsys_state *css, int cpu_in_loop) __acquires(ss_rstat_lock(css->ss)) { struct cgroup *cgrp = css->cgroup; spinlock_t *lock; bool contended; lock = ss_rstat_lock(css->ss); contended = !spin_trylock_irq(lock); if (contended) { trace_cgroup_rstat_lock_contended(cgrp, cpu_in_loop, contended); spin_lock_irq(lock); } trace_cgroup_rstat_locked(cgrp, cpu_in_loop, contended); } static inline void __css_rstat_unlock(struct cgroup_subsys_state *css, int cpu_in_loop) __releases(ss_rstat_lock(css->ss)) { struct cgroup *cgrp = css->cgroup; spinlock_t *lock; lock = ss_rstat_lock(css->ss); trace_cgroup_rstat_unlock(cgrp, cpu_in_loop, false); spin_unlock_irq(lock); } /** * css_rstat_flush - flush stats in @css's rstat subtree * @css: target cgroup subsystem state * * Collect all per-cpu stats in @css's subtree into the global counters * and propagate them upwards. After this function returns, all rstat * nodes in the subtree have up-to-date ->stat. * * This also gets all rstat nodes in the subtree including @css off the * ->updated_children lists. * * This function may block. */ __bpf_kfunc void css_rstat_flush(struct cgroup_subsys_state *css) { int cpu; bool is_self = css_is_self(css); /* * Since bpf programs can call this function, prevent access to * uninitialized rstat pointers. */ if (!css_uses_rstat(css)) return; might_sleep(); for_each_possible_cpu(cpu) { struct cgroup_subsys_state *pos; /* Reacquire for each CPU to avoid disabling IRQs too long */ __css_rstat_lock(css, cpu); pos = css_rstat_updated_list(css, cpu); for (; pos; pos = pos->rstat_flush_next) { if (is_self) { cgroup_base_stat_flush(pos->cgroup, cpu); bpf_rstat_flush(pos->cgroup, cgroup_parent(pos->cgroup), cpu); } else pos->ss->css_rstat_flush(pos, cpu); } __css_rstat_unlock(css, cpu); if (!cond_resched()) cpu_relax(); } } int css_rstat_init(struct cgroup_subsys_state *css) { struct cgroup *cgrp = css->cgroup; int cpu; bool is_self = css_is_self(css); if (is_self) { /* the root cgrp has rstat_base_cpu preallocated */ if (!cgrp->rstat_base_cpu) { cgrp->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu); if (!cgrp->rstat_base_cpu) return -ENOMEM; } } else if (css->ss->css_rstat_flush == NULL) return 0; /* the root cgrp's self css has rstat_cpu preallocated */ if (!css->rstat_cpu) { css->rstat_cpu = alloc_percpu(struct css_rstat_cpu); if (!css->rstat_cpu) { if (is_self) free_percpu(cgrp->rstat_base_cpu); return -ENOMEM; } } /* ->updated_children list is self terminated */ for_each_possible_cpu(cpu) { struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu); rstatc->owner = rstatc->updated_children = css; init_llist_node(&rstatc->lnode); if (is_self) { struct cgroup_rstat_base_cpu *rstatbc; rstatbc = cgroup_rstat_base_cpu(cgrp, cpu); u64_stats_init(&rstatbc->bsync); } } return 0; } void css_rstat_exit(struct cgroup_subsys_state *css) { int cpu; if (!css_uses_rstat(css)) return; if (!css->rstat_cpu) return; css_rstat_flush(css); /* sanity check */ for_each_possible_cpu(cpu) { struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu); if (WARN_ON_ONCE(rstatc->updated_children != css) || WARN_ON_ONCE(rstatc->updated_next)) return; } if (css_is_self(css)) { struct cgroup *cgrp = css->cgroup; free_percpu(cgrp->rstat_base_cpu); cgrp->rstat_base_cpu = NULL; } free_percpu(css->rstat_cpu); css->rstat_cpu = NULL; } /** * ss_rstat_init - subsystem-specific rstat initialization * @ss: target subsystem * * If @ss is NULL, the static locks associated with the base stats * are initialized. If @ss is non-NULL, the subsystem-specific locks * are initialized. */ int __init ss_rstat_init(struct cgroup_subsys *ss) { int cpu; if (ss) { ss->lhead = alloc_percpu(struct llist_head); if (!ss->lhead) return -ENOMEM; } spin_lock_init(ss_rstat_lock(ss)); for_each_possible_cpu(cpu) init_llist_head(ss_lhead_cpu(ss, cpu)); return 0; } /* * Functions for cgroup basic resource statistics implemented on top of * rstat. */ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat, struct cgroup_base_stat *src_bstat) { dst_bstat->cputime.utime += src_bstat->cputime.utime; dst_bstat->cputime.stime += src_bstat->cputime.stime; dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime; #ifdef CONFIG_SCHED_CORE dst_bstat->forceidle_sum += src_bstat->forceidle_sum; #endif dst_bstat->ntime += src_bstat->ntime; } static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, struct cgroup_base_stat *src_bstat) { dst_bstat->cputime.utime -= src_bstat->cputime.utime; dst_bstat->cputime.stime -= src_bstat->cputime.stime; dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime; #ifdef CONFIG_SCHED_CORE dst_bstat->forceidle_sum -= src_bstat->forceidle_sum; #endif dst_bstat->ntime -= src_bstat->ntime; } static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) { struct cgroup_rstat_base_cpu *rstatbc = cgroup_rstat_base_cpu(cgrp, cpu); struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_rstat_base_cpu *prstatbc; struct cgroup_base_stat delta; unsigned seq; /* Root-level stats are sourced from system-wide CPU stats */ if (!parent) return; /* fetch the current per-cpu values */ do { seq = __u64_stats_fetch_begin(&rstatbc->bsync); delta = rstatbc->bstat; } while (__u64_stats_fetch_retry(&rstatbc->bsync, seq)); /* propagate per-cpu delta to cgroup and per-cpu global statistics */ cgroup_base_stat_sub(&delta, &rstatbc->last_bstat); cgroup_base_stat_add(&cgrp->bstat, &delta); cgroup_base_stat_add(&rstatbc->last_bstat, &delta); cgroup_base_stat_add(&rstatbc->subtree_bstat, &delta); /* propagate cgroup and per-cpu global delta to parent (unless that's root) */ if (cgroup_parent(parent)) { delta = cgrp->bstat; cgroup_base_stat_sub(&delta, &cgrp->last_bstat); cgroup_base_stat_add(&parent->bstat, &delta); cgroup_base_stat_add(&cgrp->last_bstat, &delta); delta = rstatbc->subtree_bstat; prstatbc = cgroup_rstat_base_cpu(parent, cpu); cgroup_base_stat_sub(&delta, &rstatbc->last_subtree_bstat); cgroup_base_stat_add(&prstatbc->subtree_bstat, &delta); cgroup_base_stat_add(&rstatbc->last_subtree_bstat, &delta); } } static struct cgroup_rstat_base_cpu * cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) { struct cgroup_rstat_base_cpu *rstatbc; rstatbc = get_cpu_ptr(cgrp->rstat_base_cpu); *flags = u64_stats_update_begin_irqsave(&rstatbc->bsync); return rstatbc; } static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, struct cgroup_rstat_base_cpu *rstatbc, unsigned long flags) { u64_stats_update_end_irqrestore(&rstatbc->bsync, flags); css_rstat_updated(&cgrp->self, smp_processor_id()); put_cpu_ptr(rstatbc); } void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) { struct cgroup_rstat_base_cpu *rstatbc; unsigned long flags; rstatbc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); rstatbc->bstat.cputime.sum_exec_runtime += delta_exec; cgroup_base_stat_cputime_account_end(cgrp, rstatbc, flags); } void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec) { struct cgroup_rstat_base_cpu *rstatbc; unsigned long flags; rstatbc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); switch (index) { case CPUTIME_NICE: rstatbc->bstat.ntime += delta_exec; fallthrough; case CPUTIME_USER: rstatbc->bstat.cputime.utime += delta_exec; break; case CPUTIME_SYSTEM: case CPUTIME_IRQ: case CPUTIME_SOFTIRQ: rstatbc->bstat.cputime.stime += delta_exec; break; #ifdef CONFIG_SCHED_CORE case CPUTIME_FORCEIDLE: rstatbc->bstat.forceidle_sum += delta_exec; break; #endif default: break; } cgroup_base_stat_cputime_account_end(cgrp, rstatbc, flags); } /* * compute the cputime for the root cgroup by getting the per cpu data * at a global level, then categorizing the fields in a manner consistent * with how it is done by __cgroup_account_cputime_field for each bit of * cpu time attributed to a cgroup. */ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) { struct task_cputime *cputime = &bstat->cputime; int i; memset(bstat, 0, sizeof(*bstat)); for_each_possible_cpu(i) { struct kernel_cpustat kcpustat; u64 *cpustat = kcpustat.cpustat; u64 user = 0; u64 sys = 0; kcpustat_cpu_fetch(&kcpustat, i); user += cpustat[CPUTIME_USER]; user += cpustat[CPUTIME_NICE]; cputime->utime += user; sys += cpustat[CPUTIME_SYSTEM]; sys += cpustat[CPUTIME_IRQ]; sys += cpustat[CPUTIME_SOFTIRQ]; cputime->stime += sys; cputime->sum_exec_runtime += user; cputime->sum_exec_runtime += sys; #ifdef CONFIG_SCHED_CORE bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; #endif bstat->ntime += cpustat[CPUTIME_NICE]; } } static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat *bstat) { #ifdef CONFIG_SCHED_CORE u64 forceidle_time = bstat->forceidle_sum; do_div(forceidle_time, NSEC_PER_USEC); seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time); #endif } void cgroup_base_stat_cputime_show(struct seq_file *seq) { struct cgroup *cgrp = seq_css(seq)->cgroup; struct cgroup_base_stat bstat; if (cgroup_parent(cgrp)) { css_rstat_flush(&cgrp->self); __css_rstat_lock(&cgrp->self, -1); bstat = cgrp->bstat; cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &bstat.cputime.utime, &bstat.cputime.stime); __css_rstat_unlock(&cgrp->self, -1); } else { root_cgroup_cputime(&bstat); } do_div(bstat.cputime.sum_exec_runtime, NSEC_PER_USEC); do_div(bstat.cputime.utime, NSEC_PER_USEC); do_div(bstat.cputime.stime, NSEC_PER_USEC); do_div(bstat.ntime, NSEC_PER_USEC); seq_printf(seq, "usage_usec %llu\n" "user_usec %llu\n" "system_usec %llu\n" "nice_usec %llu\n", bstat.cputime.sum_exec_runtime, bstat.cputime.utime, bstat.cputime.stime, bstat.ntime); cgroup_force_idle_show(seq, &bstat); } /* Add bpf kfuncs for css_rstat_updated() and css_rstat_flush() */ BTF_KFUNCS_START(bpf_rstat_kfunc_ids) BTF_ID_FLAGS(func, css_rstat_updated) BTF_ID_FLAGS(func, css_rstat_flush, KF_SLEEPABLE) BTF_KFUNCS_END(bpf_rstat_kfunc_ids) static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { .owner = THIS_MODULE, .set = &bpf_rstat_kfunc_ids, }; static int __init bpf_rstat_kfunc_init(void) { return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_rstat_kfunc_set); } late_initcall(bpf_rstat_kfunc_init); |
| 4 4 4 4 4 81 80 6 6 6 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 1 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 3 3 2 3 3 3 3 2 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 71 56 56 56 56 56 56 56 56 56 56 52 5 76 77 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org> * Copyright (C) 2019-2022 Intel Corporation */ #include <linux/netdevice.h> #include <linux/types.h> #include <linux/skbuff.h> #include <linux/debugfs.h> #include <linux/random.h> #include <linux/moduleparam.h> #include <linux/ieee80211.h> #include <linux/minmax.h> #include <net/mac80211.h> #include "rate.h" #include "sta_info.h" #include "rc80211_minstrel_ht.h" #define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 /* Number of bits for an average sized packet */ #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) /* Number of symbols for a packet with (bps) bits per symbol */ #define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps)) /* Transmission time (nanoseconds) for a packet containing (syms) symbols */ #define MCS_SYMBOL_TIME(sgi, syms) \ (sgi ? \ ((syms) * 18000 + 4000) / 5 : /* syms * 3.6 us */ \ ((syms) * 1000) << 2 /* syms * 4 us */ \ ) /* Transmit duration for the raw data part of an average sized packet */ #define MCS_DURATION(streams, sgi, bps) \ (MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) / AVG_AMPDU_SIZE) #define BW_20 0 #define BW_40 1 #define BW_80 2 /* * Define group sort order: HT40 -> SGI -> #streams */ #define GROUP_IDX(_streams, _sgi, _ht40) \ MINSTREL_HT_GROUP_0 + \ MINSTREL_MAX_STREAMS * 2 * _ht40 + \ MINSTREL_MAX_STREAMS * _sgi + \ _streams - 1 #define _MAX(a, b) (((a)>(b))?(a):(b)) #define GROUP_SHIFT(duration) \ _MAX(0, 16 - __builtin_clz(duration)) /* MCS rate information for an MCS group */ #define __MCS_GROUP(_streams, _sgi, _ht40, _s) \ [GROUP_IDX(_streams, _sgi, _ht40)] = { \ .streams = _streams, \ .shift = _s, \ .bw = _ht40, \ .flags = \ IEEE80211_TX_RC_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ (_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ .duration = { \ MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234) >> _s, \ MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) >> _s \ } \ } #define MCS_GROUP_SHIFT(_streams, _sgi, _ht40) \ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26)) #define MCS_GROUP(_streams, _sgi, _ht40) \ __MCS_GROUP(_streams, _sgi, _ht40, \ MCS_GROUP_SHIFT(_streams, _sgi, _ht40)) #define VHT_GROUP_IDX(_streams, _sgi, _bw) \ (MINSTREL_VHT_GROUP_0 + \ MINSTREL_MAX_STREAMS * 2 * (_bw) + \ MINSTREL_MAX_STREAMS * (_sgi) + \ (_streams) - 1) #define BW2VBPS(_bw, r3, r2, r1) \ (_bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1) #define __VHT_GROUP(_streams, _sgi, _bw, _s) \ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \ .streams = _streams, \ .shift = _s, \ .bw = _bw, \ .flags = \ IEEE80211_TX_RC_VHT_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ (_bw == BW_80 ? IEEE80211_TX_RC_80_MHZ_WIDTH : \ _bw == BW_40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ .duration = { \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 117, 54, 26)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 234, 108, 52)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 351, 162, 78)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 468, 216, 104)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 702, 324, 156)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 936, 432, 208)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1053, 486, 234)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1170, 540, 260)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1404, 648, 312)) >> _s, \ MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 1560, 720, 346)) >> _s \ } \ } #define VHT_GROUP_SHIFT(_streams, _sgi, _bw) \ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, \ BW2VBPS(_bw, 117, 54, 26))) #define VHT_GROUP(_streams, _sgi, _bw) \ __VHT_GROUP(_streams, _sgi, _bw, \ VHT_GROUP_SHIFT(_streams, _sgi, _bw)) #define CCK_DURATION(_bitrate, _short) \ (1000 * (10 /* SIFS */ + \ (_short ? 72 + 24 : 144 + 48) + \ (8 * (AVG_PKT_SIZE + 4) * 10) / (_bitrate))) #define CCK_DURATION_LIST(_short, _s) \ CCK_DURATION(10, _short) >> _s, \ CCK_DURATION(20, _short) >> _s, \ CCK_DURATION(55, _short) >> _s, \ CCK_DURATION(110, _short) >> _s #define __CCK_GROUP(_s) \ [MINSTREL_CCK_GROUP] = { \ .streams = 1, \ .flags = 0, \ .shift = _s, \ .duration = { \ CCK_DURATION_LIST(false, _s), \ CCK_DURATION_LIST(true, _s) \ } \ } #define CCK_GROUP_SHIFT \ GROUP_SHIFT(CCK_DURATION(10, false)) #define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT) #define OFDM_DURATION(_bitrate) \ (1000 * (16 /* SIFS + signal ext */ + \ 16 /* T_PREAMBLE */ + \ 4 /* T_SIGNAL */ + \ 4 * (((16 + 80 * (AVG_PKT_SIZE + 4) + 6) / \ ((_bitrate) * 4))))) #define OFDM_DURATION_LIST(_s) \ OFDM_DURATION(60) >> _s, \ OFDM_DURATION(90) >> _s, \ OFDM_DURATION(120) >> _s, \ OFDM_DURATION(180) >> _s, \ OFDM_DURATION(240) >> _s, \ OFDM_DURATION(360) >> _s, \ OFDM_DURATION(480) >> _s, \ OFDM_DURATION(540) >> _s #define __OFDM_GROUP(_s) \ [MINSTREL_OFDM_GROUP] = { \ .streams = 1, \ .flags = 0, \ .shift = _s, \ .duration = { \ OFDM_DURATION_LIST(_s), \ } \ } #define OFDM_GROUP_SHIFT \ GROUP_SHIFT(OFDM_DURATION(60)) #define OFDM_GROUP __OFDM_GROUP(OFDM_GROUP_SHIFT) static bool minstrel_vht_only = true; module_param(minstrel_vht_only, bool, 0644); MODULE_PARM_DESC(minstrel_vht_only, "Use only VHT rates when VHT is supported by sta."); /* * To enable sufficiently targeted rate sampling, MCS rates are divided into * groups, based on the number of streams and flags (HT40, SGI) that they * use. * * Sortorder has to be fixed for GROUP_IDX macro to be applicable: * BW -> SGI -> #streams */ const struct mcs_group minstrel_mcs_groups[] = { MCS_GROUP(1, 0, BW_20), MCS_GROUP(2, 0, BW_20), MCS_GROUP(3, 0, BW_20), MCS_GROUP(4, 0, BW_20), MCS_GROUP(1, 1, BW_20), MCS_GROUP(2, 1, BW_20), MCS_GROUP(3, 1, BW_20), MCS_GROUP(4, 1, BW_20), MCS_GROUP(1, 0, BW_40), MCS_GROUP(2, 0, BW_40), MCS_GROUP(3, 0, BW_40), MCS_GROUP(4, 0, BW_40), MCS_GROUP(1, 1, BW_40), MCS_GROUP(2, 1, BW_40), MCS_GROUP(3, 1, BW_40), MCS_GROUP(4, 1, BW_40), CCK_GROUP, OFDM_GROUP, VHT_GROUP(1, 0, BW_20), VHT_GROUP(2, 0, BW_20), VHT_GROUP(3, 0, BW_20), VHT_GROUP(4, 0, BW_20), VHT_GROUP(1, 1, BW_20), VHT_GROUP(2, 1, BW_20), VHT_GROUP(3, 1, BW_20), VHT_GROUP(4, 1, BW_20), VHT_GROUP(1, 0, BW_40), VHT_GROUP(2, 0, BW_40), VHT_GROUP(3, 0, BW_40), VHT_GROUP(4, 0, BW_40), VHT_GROUP(1, 1, BW_40), VHT_GROUP(2, 1, BW_40), VHT_GROUP(3, 1, BW_40), VHT_GROUP(4, 1, BW_40), VHT_GROUP(1, 0, BW_80), VHT_GROUP(2, 0, BW_80), VHT_GROUP(3, 0, BW_80), VHT_GROUP(4, 0, BW_80), VHT_GROUP(1, 1, BW_80), VHT_GROUP(2, 1, BW_80), VHT_GROUP(3, 1, BW_80), VHT_GROUP(4, 1, BW_80), }; const s16 minstrel_cck_bitrates[4] = { 10, 20, 55, 110 }; const s16 minstrel_ofdm_bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly; static const u8 minstrel_sample_seq[] = { MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_JUMP, MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_JUMP, MINSTREL_SAMPLE_TYPE_INC, MINSTREL_SAMPLE_TYPE_SLOW, }; static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi); /* * Some VHT MCSes are invalid (when Ndbps / Nes is not an integer) * e.g for MCS9@20MHzx1Nss: Ndbps=8x52*(5/6) Nes=1 * * Returns the valid mcs map for struct minstrel_mcs_group_data.supported */ static u16 minstrel_get_valid_vht_rates(int bw, int nss, __le16 mcs_map) { u16 mask = 0; if (bw == BW_20) { if (nss != 3 && nss != 6) mask = BIT(9); } else if (bw == BW_80) { if (nss == 3 || nss == 7) mask = BIT(6); else if (nss == 6) mask = BIT(9); } else { WARN_ON(bw != BW_40); } switch ((le16_to_cpu(mcs_map) >> (2 * (nss - 1))) & 3) { case IEEE80211_VHT_MCS_SUPPORT_0_7: mask |= 0x300; break; case IEEE80211_VHT_MCS_SUPPORT_0_8: mask |= 0x200; break; case IEEE80211_VHT_MCS_SUPPORT_0_9: break; default: mask = 0x3ff; } return 0x3ff & ~mask; } static bool minstrel_ht_is_legacy_group(int group) { return group == MINSTREL_CCK_GROUP || group == MINSTREL_OFDM_GROUP; } /* * Look up an MCS group index based on mac80211 rate information */ static int minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) { return GROUP_IDX((rate->idx / 8) + 1, !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } /* * Look up an MCS group index based on new cfg80211 rate_info. */ static int minstrel_ht_ri_get_group_idx(struct rate_info *rate) { return GROUP_IDX((rate->mcs / 8) + 1, !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), !!(rate->bw & RATE_INFO_BW_40)); } static int minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate) { return VHT_GROUP_IDX(ieee80211_rate_get_vht_nss(rate), !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + 2*!!(rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)); } /* * Look up an MCS group index based on new cfg80211 rate_info. */ static int minstrel_vht_ri_get_group_idx(struct rate_info *rate) { return VHT_GROUP_IDX(rate->nss, !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI), !!(rate->bw & RATE_INFO_BW_40) + 2*!!(rate->bw & RATE_INFO_BW_80)); } static struct minstrel_rate_stats * minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate) { int group, idx; if (rate->flags & IEEE80211_TX_RC_MCS) { group = minstrel_ht_get_group_idx(rate); idx = rate->idx % 8; goto out; } if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { group = minstrel_vht_get_group_idx(rate); idx = ieee80211_rate_get_vht_mcs(rate); goto out; } group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { if (!(mi->supported[group] & BIT(idx))) continue; if (rate->idx != mp->cck_rates[idx]) continue; /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) idx += 4; goto out; } group = MINSTREL_OFDM_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) if (rate->idx == mp->ofdm_rates[mi->band][idx]) goto out; idx = 0; out: return &mi->groups[group].rates[idx]; } /* * Get the minstrel rate statistics for specified STA and rate info. */ static struct minstrel_rate_stats * minstrel_ht_ri_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_rate_status *rate_status) { int group, idx; struct rate_info *rate = &rate_status->rate_idx; if (rate->flags & RATE_INFO_FLAGS_MCS) { group = minstrel_ht_ri_get_group_idx(rate); idx = rate->mcs % 8; goto out; } if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) { group = minstrel_vht_ri_get_group_idx(rate); idx = rate->mcs; goto out; } group = MINSTREL_CCK_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { if (rate->legacy != minstrel_cck_bitrates[ mp->cck_rates[idx] ]) continue; /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && mi->use_short_preamble) idx += 4; goto out; } group = MINSTREL_OFDM_GROUP; for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) if (rate->legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][idx] ]) goto out; idx = 0; out: return &mi->groups[group].rates[idx]; } static inline struct minstrel_rate_stats * minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) { return &mi->groups[MI_RATE_GROUP(index)].rates[MI_RATE_IDX(index)]; } static inline int minstrel_get_duration(int index) { const struct mcs_group *group = &minstrel_mcs_groups[MI_RATE_GROUP(index)]; unsigned int duration = group->duration[MI_RATE_IDX(index)]; return duration << group->shift; } static unsigned int minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi) { int duration; if (mi->avg_ampdu_len) return MINSTREL_TRUNC(mi->avg_ampdu_len); if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_tp_rate[0]))) return 1; duration = minstrel_get_duration(mi->max_tp_rate[0]); if (duration > 400 * 1000) return 2; if (duration > 250 * 1000) return 4; if (duration > 150 * 1000) return 8; return 16; } /* * Return current throughput based on the average A-MPDU length, taking into * account the expected number of retransmissions and their expected length */ int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, int prob_avg) { unsigned int nsecs = 0, overhead = mi->overhead; unsigned int ampdu_len = 1; /* do not account throughput if success prob is below 10% */ if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; if (minstrel_ht_is_legacy_group(group)) overhead = mi->overhead_legacy; else ampdu_len = minstrel_ht_avg_ampdu_len(mi); nsecs = 1000 * overhead / ampdu_len; nsecs += minstrel_mcs_groups[group].duration[rate] << minstrel_mcs_groups[group].shift; /* * For the throughput calculation, limit the probability value to 90% to * account for collision related packet error rate fluctuation * (prob is scaled - see MINSTREL_FRAC above) */ if (prob_avg > MINSTREL_FRAC(90, 100)) prob_avg = MINSTREL_FRAC(90, 100); return MINSTREL_TRUNC(100 * ((prob_avg * 1000000) / nsecs)); } /* * Find & sort topmost throughput rates * * If multiple rates provide equal throughput the sorting is based on their * current success probability. Higher success probability is preferred among * MCS groups, CCK rates do not provide aggregation and are therefore at last. */ static void minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, u16 *tp_list) { int cur_group, cur_idx, cur_tp_avg, cur_prob; int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int j = MAX_THR_RATES; cur_group = MI_RATE_GROUP(index); cur_idx = MI_RATE_IDX(index); cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg; cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob); do { tmp_group = MI_RATE_GROUP(tp_list[j - 1]); tmp_idx = MI_RATE_IDX(tp_list[j - 1]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (cur_tp_avg < tmp_tp_avg || (cur_tp_avg == tmp_tp_avg && cur_prob <= tmp_prob)) break; j--; } while (j > 0); if (j < MAX_THR_RATES - 1) { memmove(&tp_list[j + 1], &tp_list[j], (sizeof(*tp_list) * (MAX_THR_RATES - (j + 1)))); } if (j < MAX_THR_RATES) tp_list[j] = index; } /* * Find and set the topmost probability rate per sta and per group */ static void minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 *dest, u16 index) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int max_tp_group, max_tp_idx, max_tp_prob; int cur_tp_avg, cur_group, cur_idx; int max_gpr_group, max_gpr_idx; int max_gpr_tp_avg, max_gpr_prob; cur_group = MI_RATE_GROUP(index); cur_idx = MI_RATE_IDX(index); mg = &mi->groups[cur_group]; mrs = &mg->rates[cur_idx]; tmp_group = MI_RATE_GROUP(*dest); tmp_idx = MI_RATE_IDX(*dest); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */ max_tp_group = MI_RATE_GROUP(mi->max_tp_rate[0]); max_tp_idx = MI_RATE_IDX(mi->max_tp_rate[0]); max_tp_prob = mi->groups[max_tp_group].rates[max_tp_idx].prob_avg; if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index)) && !minstrel_ht_is_legacy_group(max_tp_group)) return; /* skip rates faster than max tp rate with lower prob */ if (minstrel_get_duration(mi->max_tp_rate[0]) > minstrel_get_duration(index) && mrs->prob_avg < max_tp_prob) return; max_gpr_group = MI_RATE_GROUP(mg->max_group_prob_rate); max_gpr_idx = MI_RATE_IDX(mg->max_group_prob_rate); max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg; if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) { cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, mrs->prob_avg); if (cur_tp_avg > tmp_tp_avg) *dest = index; max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group, max_gpr_idx, max_gpr_prob); if (cur_tp_avg > max_gpr_tp_avg) mg->max_group_prob_rate = index; } else { if (mrs->prob_avg > tmp_prob) *dest = index; if (mrs->prob_avg > max_gpr_prob) mg->max_group_prob_rate = index; } } /* * Assign new rate set per sta and use CCK rates only if the fastest * rate (max_tp_rate[0]) is from CCK group. This prohibits such sorted * rate sets where MCS and CCK rates are mixed, because CCK rates can * not use aggregation. */ static void minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, u16 tmp_mcs_tp_rate[MAX_THR_RATES], u16 tmp_legacy_tp_rate[MAX_THR_RATES]) { unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob; int i; tmp_group = MI_RATE_GROUP(tmp_legacy_tp_rate[0]); tmp_idx = MI_RATE_IDX(tmp_legacy_tp_rate[0]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); tmp_group = MI_RATE_GROUP(tmp_mcs_tp_rate[0]); tmp_idx = MI_RATE_IDX(tmp_mcs_tp_rate[0]); tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { minstrel_ht_sort_best_tp_rates(mi, tmp_legacy_tp_rate[i], tmp_mcs_tp_rate); } } } /* * Try to increase robustness of max_prob rate by decrease number of * streams if possible. */ static inline void minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; if (!mi->sta->deflink.ht_cap.ht_supported) return; group = MI_RATE_GROUP(mi->max_tp_rate[0]); tmp_max_streams = minstrel_mcs_groups[group].streams; for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { mg = &mi->groups[group]; if (!mi->supported[group] || group == MINSTREL_CCK_GROUP) continue; tmp_idx = MI_RATE_IDX(mg->max_group_prob_rate); tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg; if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) && (minstrel_mcs_groups[group].streams < tmp_max_streams)) { mi->max_prob_rate = mg->max_group_prob_rate; tmp_tp = minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob); } } } static u16 __minstrel_ht_get_sample_rate(struct minstrel_ht_sta *mi, enum minstrel_sample_type type) { u16 *rates = mi->sample[type].sample_rates; u16 cur; int i; for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) { if (!rates[i]) continue; cur = rates[i]; rates[i] = 0; return cur; } return 0; } static inline int minstrel_ewma(int old, int new, int weight) { int diff, incr; diff = new - old; incr = (EWMA_DIV - weight) * diff / EWMA_DIV; return old + incr; } static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in) { s32 out_1 = *prev_1; s32 out_2 = *prev_2; s32 val; if (!in) in += 1; if (!out_1) { val = out_1 = in; goto out; } val = MINSTREL_AVG_COEFF1 * in; val += MINSTREL_AVG_COEFF2 * out_1; val += MINSTREL_AVG_COEFF3 * out_2; val >>= MINSTREL_SCALE; if (val > 1 << MINSTREL_SCALE) val = 1 << MINSTREL_SCALE; if (val < 0) val = 1; out: *prev_2 = out_1; *prev_1 = val; return val; } /* * Recalculate statistics and counters of a given rate */ static void minstrel_ht_calc_rate_stats(struct minstrel_priv *mp, struct minstrel_rate_stats *mrs) { unsigned int cur_prob; if (unlikely(mrs->attempts > 0)) { cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); minstrel_filter_avg_add(&mrs->prob_avg, &mrs->prob_avg_1, cur_prob); mrs->att_hist += mrs->attempts; mrs->succ_hist += mrs->success; } mrs->last_success = mrs->success; mrs->last_attempts = mrs->attempts; mrs->success = 0; mrs->attempts = 0; } static bool minstrel_ht_find_sample_rate(struct minstrel_ht_sta *mi, int type, int idx) { int i; for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) { u16 cur = mi->sample[type].sample_rates[i]; if (cur == idx) return true; if (!cur) break; } return false; } static int minstrel_ht_move_sample_rates(struct minstrel_ht_sta *mi, int type, u32 fast_rate_dur, u32 slow_rate_dur) { u16 *rates = mi->sample[type].sample_rates; int i, j; for (i = 0, j = 0; i < MINSTREL_SAMPLE_RATES; i++) { u32 duration; bool valid = false; u16 cur; cur = rates[i]; if (!cur) continue; duration = minstrel_get_duration(cur); switch (type) { case MINSTREL_SAMPLE_TYPE_SLOW: valid = duration > fast_rate_dur && duration < slow_rate_dur; break; case MINSTREL_SAMPLE_TYPE_INC: case MINSTREL_SAMPLE_TYPE_JUMP: valid = duration < fast_rate_dur; break; default: valid = false; break; } if (!valid) { rates[i] = 0; continue; } if (i == j) continue; rates[j++] = cur; rates[i] = 0; } return j; } static int minstrel_ht_group_min_rate_offset(struct minstrel_ht_sta *mi, int group, u32 max_duration) { u16 supported = mi->supported[group]; int i; for (i = 0; i < MCS_GROUP_RATES && supported; i++, supported >>= 1) { if (!(supported & BIT(0))) continue; if (minstrel_get_duration(MI_RATE(group, i)) >= max_duration) continue; return i; } return -1; } /* * Incremental update rates: * Flip through groups and pick the first group rate that is faster than the * highest currently selected rate */ static u16 minstrel_ht_next_inc_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur) { u8 type = MINSTREL_SAMPLE_TYPE_INC; int i, index = 0; u8 group; group = mi->sample[type].sample_group; for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups); index = minstrel_ht_group_min_rate_offset(mi, group, fast_rate_dur); if (index < 0) continue; index = MI_RATE(group, index & 0xf); if (!minstrel_ht_find_sample_rate(mi, type, index)) goto out; } index = 0; out: mi->sample[type].sample_group = group; return index; } static int minstrel_ht_next_group_sample_rate(struct minstrel_ht_sta *mi, int group, u16 supported, int offset) { struct minstrel_mcs_group_data *mg = &mi->groups[group]; u16 idx; int i; for (i = 0; i < MCS_GROUP_RATES; i++) { idx = sample_table[mg->column][mg->index]; if (++mg->index >= MCS_GROUP_RATES) { mg->index = 0; if (++mg->column >= ARRAY_SIZE(sample_table)) mg->column = 0; } if (idx < offset) continue; if (!(supported & BIT(idx))) continue; return MI_RATE(group, idx); } return -1; } /* * Jump rates: * Sample random rates, use those that are faster than the highest * currently selected rate. Rates between the fastest and the slowest * get sorted into the slow sample bucket, but only if it has room */ static u16 minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur, u32 slow_rate_dur, int *slow_rate_ofs) { struct minstrel_rate_stats *mrs; u32 max_duration = slow_rate_dur; int i, index, offset; u16 *slow_rates; u16 supported; u32 duration; u8 group; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) max_duration = fast_rate_dur; slow_rates = mi->sample[MINSTREL_SAMPLE_TYPE_SLOW].sample_rates; group = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group; for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { u8 type; group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups); supported = mi->supported[group]; if (!supported) continue; offset = minstrel_ht_group_min_rate_offset(mi, group, max_duration); if (offset < 0) continue; index = minstrel_ht_next_group_sample_rate(mi, group, supported, offset); if (index < 0) continue; duration = minstrel_get_duration(index); if (duration < fast_rate_dur) type = MINSTREL_SAMPLE_TYPE_JUMP; else type = MINSTREL_SAMPLE_TYPE_SLOW; if (minstrel_ht_find_sample_rate(mi, type, index)) continue; if (type == MINSTREL_SAMPLE_TYPE_JUMP) goto found; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) continue; if (duration >= slow_rate_dur) continue; /* skip slow rates with high success probability */ mrs = minstrel_get_ratestats(mi, index); if (mrs->prob_avg > MINSTREL_FRAC(95, 100)) continue; slow_rates[(*slow_rate_ofs)++] = index; if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES) max_duration = fast_rate_dur; } index = 0; found: mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group = group; return index; } static void minstrel_ht_refill_sample_rates(struct minstrel_ht_sta *mi) { u32 prob_dur = minstrel_get_duration(mi->max_prob_rate); u32 tp_dur = minstrel_get_duration(mi->max_tp_rate[0]); u32 tp2_dur = minstrel_get_duration(mi->max_tp_rate[1]); u32 fast_rate_dur = min(min(tp_dur, tp2_dur), prob_dur); u32 slow_rate_dur = max(max(tp_dur, tp2_dur), prob_dur); u16 *rates; int i, j; rates = mi->sample[MINSTREL_SAMPLE_TYPE_INC].sample_rates; i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_INC, fast_rate_dur, slow_rate_dur); while (i < MINSTREL_SAMPLE_RATES) { rates[i] = minstrel_ht_next_inc_rate(mi, tp_dur); if (!rates[i]) break; i++; } rates = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_rates; i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_JUMP, fast_rate_dur, slow_rate_dur); j = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_SLOW, fast_rate_dur, slow_rate_dur); while (i < MINSTREL_SAMPLE_RATES) { rates[i] = minstrel_ht_next_jump_rate(mi, fast_rate_dur, slow_rate_dur, &j); if (!rates[i]) break; i++; } for (i = 0; i < ARRAY_SIZE(mi->sample); i++) memcpy(mi->sample[i].cur_sample_rates, mi->sample[i].sample_rates, sizeof(mi->sample[i].cur_sample_rates)); } /* * Update rate statistics and select new primary rates * * Rules for rate selection: * - max_prob_rate must use only one stream, as a tradeoff between delivery * probability and throughput during strong fluctuations * - as long as the max prob rate has a probability of more than 75%, pick * higher throughput rates, even if the probability is a bit lower */ static void minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; int group, i, j, cur_prob; u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate; u16 index; bool ht_supported = mi->sta->deflink.ht_cap.ht_supported; if (mi->ampdu_packets > 0) { if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL); else mi->avg_ampdu_len = 0; mi->ampdu_len = 0; mi->ampdu_packets = 0; } if (mi->supported[MINSTREL_CCK_GROUP]) group = MINSTREL_CCK_GROUP; else if (mi->supported[MINSTREL_OFDM_GROUP]) group = MINSTREL_OFDM_GROUP; else group = 0; index = MI_RATE(group, 0); for (j = 0; j < ARRAY_SIZE(tmp_legacy_tp_rate); j++) tmp_legacy_tp_rate[j] = index; if (mi->supported[MINSTREL_VHT_GROUP_0]) group = MINSTREL_VHT_GROUP_0; else if (ht_supported) group = MINSTREL_HT_GROUP_0; else if (mi->supported[MINSTREL_CCK_GROUP]) group = MINSTREL_CCK_GROUP; else group = MINSTREL_OFDM_GROUP; index = MI_RATE(group, 0); tmp_max_prob_rate = index; for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++) tmp_mcs_tp_rate[j] = index; /* Find best rate sets within all MCS groups*/ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { u16 *tp_rate = tmp_mcs_tp_rate; u16 last_prob = 0; mg = &mi->groups[group]; if (!mi->supported[group]) continue; /* (re)Initialize group rate indexes */ for(j = 0; j < MAX_THR_RATES; j++) tmp_group_tp_rate[j] = MI_RATE(group, 0); if (group == MINSTREL_CCK_GROUP && ht_supported) tp_rate = tmp_legacy_tp_rate; for (i = MCS_GROUP_RATES - 1; i >= 0; i--) { if (!(mi->supported[group] & BIT(i))) continue; index = MI_RATE(group, i); mrs = &mg->rates[i]; mrs->retry_updated = false; minstrel_ht_calc_rate_stats(mp, mrs); if (mrs->att_hist) last_prob = max(last_prob, mrs->prob_avg); else mrs->prob_avg = max(last_prob, mrs->prob_avg); cur_prob = mrs->prob_avg; if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) continue; /* Find max throughput rate set */ minstrel_ht_sort_best_tp_rates(mi, index, tp_rate); /* Find max throughput rate set within a group */ minstrel_ht_sort_best_tp_rates(mi, index, tmp_group_tp_rate); } memcpy(mg->max_group_tp_rate, tmp_group_tp_rate, sizeof(mg->max_group_tp_rate)); } /* Assign new rate set per sta */ minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate, tmp_legacy_tp_rate); memcpy(mi->max_tp_rate, tmp_mcs_tp_rate, sizeof(mi->max_tp_rate)); for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { if (!mi->supported[group]) continue; mg = &mi->groups[group]; mg->max_group_prob_rate = MI_RATE(group, 0); for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) continue; index = MI_RATE(group, i); /* Find max probability rate per group and global */ minstrel_ht_set_best_prob_rate(mi, &tmp_max_prob_rate, index); } } mi->max_prob_rate = tmp_max_prob_rate; /* Try to increase robustness of max_prob_rate*/ minstrel_ht_prob_rate_reduce_streams(mi); minstrel_ht_refill_sample_rates(mi); #ifdef CONFIG_MAC80211_DEBUGFS /* use fixed index if set */ if (mp->fixed_rate_idx != -1) { for (i = 0; i < 4; i++) mi->max_tp_rate[i] = mp->fixed_rate_idx; mi->max_prob_rate = mp->fixed_rate_idx; } #endif /* Reset update timer */ mi->last_stats_update = jiffies; mi->sample_time = jiffies; } static bool minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate) { int i; if (rate->idx < 0) return false; if (!rate->count) return false; if (rate->flags & IEEE80211_TX_RC_MCS || rate->flags & IEEE80211_TX_RC_VHT_MCS) return true; for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) if (rate->idx == mp->cck_rates[i]) return true; for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) if (rate->idx == mp->ofdm_rates[mi->band][i]) return true; return false; } /* * Check whether rate_status contains valid information. */ static bool minstrel_ht_ri_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_rate_status *rate_status) { int i; if (!rate_status) return false; if (!rate_status->try_count) return false; if (rate_status->rate_idx.flags & RATE_INFO_FLAGS_MCS || rate_status->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS) return true; for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) { if (rate_status->rate_idx.legacy == minstrel_cck_bitrates[ mp->cck_rates[i] ]) return true; } for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates); i++) { if (rate_status->rate_idx.legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][i] ]) return true; } return false; } static void minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary) { int group, orig_group; orig_group = group = MI_RATE_GROUP(*idx); while (group > 0) { group--; if (!mi->supported[group]) continue; if (minstrel_mcs_groups[group].streams > minstrel_mcs_groups[orig_group].streams) continue; if (primary) *idx = mi->groups[group].max_group_tp_rate[0]; else *idx = mi->groups[group].max_group_tp_rate[1]; break; } } static void minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, void *priv_sta, struct ieee80211_tx_status *st) { struct ieee80211_tx_info *info = st->info; struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_tx_rate *ar = info->status.rates; struct minstrel_rate_stats *rate, *rate2; struct minstrel_priv *mp = priv; u32 update_interval = mp->update_interval; bool last, update = false; int i; /* Ignore packet that was sent with noAck flag */ if (info->flags & IEEE80211_TX_CTL_NO_ACK) return; /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; if (!(info->flags & IEEE80211_TX_STAT_AMPDU)) { info->status.ampdu_ack_len = (info->flags & IEEE80211_TX_STAT_ACK ? 1 : 0); info->status.ampdu_len = 1; } /* wraparound */ if (mi->total_packets >= ~0 - info->status.ampdu_len) { mi->total_packets = 0; mi->sample_packets = 0; } mi->total_packets += info->status.ampdu_len; if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; mi->ampdu_packets++; mi->ampdu_len += info->status.ampdu_len; if (st->rates && st->n_rates) { last = !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[0])); for (i = 0; !last; i++) { last = (i == st->n_rates - 1) || !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[i + 1])); rate = minstrel_ht_ri_get_stats(mp, mi, &(st->rates[i])); if (last) rate->success += info->status.ampdu_ack_len; rate->attempts += st->rates[i].try_count * info->status.ampdu_len; } } else { last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]); rate = minstrel_ht_get_stats(mp, mi, &ar[i]); if (last) rate->success += info->status.ampdu_ack_len; rate->attempts += ar[i].count * info->status.ampdu_len; } } if (mp->hw->max_rates > 1) { /* * check for sudden death of spatial multiplexing, * downgrade to a lower number of streams if necessary. */ rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); if (rate->attempts > 30 && rate->success < rate->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); update = true; } rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); if (rate2->attempts > 30 && rate2->success < rate2->attempts / 4) { minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); update = true; } } if (time_after(jiffies, mi->last_stats_update + update_interval)) { update = true; minstrel_ht_update_stats(mp, mi); } if (update) minstrel_ht_update_rates(mp, mi); } static void minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, int index) { struct minstrel_rate_stats *mrs; unsigned int tx_time, tx_time_rtscts, tx_time_data; unsigned int cw = mp->cw_min; unsigned int ctime = 0; unsigned int t_slot = 9; /* FIXME */ unsigned int ampdu_len = minstrel_ht_avg_ampdu_len(mi); unsigned int overhead = 0, overhead_rtscts = 0; mrs = minstrel_get_ratestats(mi, index); if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) { mrs->retry_count = 1; mrs->retry_count_rtscts = 1; return; } mrs->retry_count = 2; mrs->retry_count_rtscts = 2; mrs->retry_updated = true; tx_time_data = minstrel_get_duration(index) * ampdu_len / 1000; /* Contention time for first 2 tries */ ctime = (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); ctime += (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index))) { overhead = mi->overhead_legacy; overhead_rtscts = mi->overhead_legacy_rtscts; } else { overhead = mi->overhead; overhead_rtscts = mi->overhead_rtscts; } /* Total TX time for data and Contention after first 2 tries */ tx_time = ctime + 2 * (overhead + tx_time_data); tx_time_rtscts = ctime + 2 * (overhead_rtscts + tx_time_data); /* See how many more tries we can fit inside segment size */ do { /* Contention time for this try */ ctime = (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); /* Total TX time after this try */ tx_time += ctime + overhead + tx_time_data; tx_time_rtscts += ctime + overhead_rtscts + tx_time_data; if (tx_time_rtscts < mp->segment_size) mrs->retry_count_rtscts++; } while ((tx_time < mp->segment_size) && (++mrs->retry_count < mp->max_retry)); } static void minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_sta_rates *ratetbl, int offset, int index) { int group_idx = MI_RATE_GROUP(index); const struct mcs_group *group = &minstrel_mcs_groups[group_idx]; struct minstrel_rate_stats *mrs; u8 idx; u16 flags = group->flags; mrs = minstrel_get_ratestats(mi, index); if (!mrs->retry_updated) minstrel_calc_retransmit(mp, mi, index); if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { ratetbl->rate[offset].count = 2; ratetbl->rate[offset].count_rts = 2; ratetbl->rate[offset].count_cts = 2; } else { ratetbl->rate[offset].count = mrs->retry_count; ratetbl->rate[offset].count_cts = mrs->retry_count; ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts; } index = MI_RATE_IDX(index); if (group_idx == MINSTREL_CCK_GROUP) idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; else if (group_idx == MINSTREL_OFDM_GROUP) idx = mp->ofdm_rates[mi->band][index % ARRAY_SIZE(mp->ofdm_rates[0])]; else if (flags & IEEE80211_TX_RC_VHT_MCS) idx = ((group->streams - 1) << 4) | (index & 0xF); else idx = index + (group->streams - 1) * 8; /* enable RTS/CTS if needed: * - if station is in dynamic SMPS (and streams > 1) * - for fallback rates, to increase chances of getting through */ if (offset > 0 || (mi->sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC && group->streams > 1)) { ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; flags |= IEEE80211_TX_RC_USE_RTS_CTS; } ratetbl->rate[offset].idx = idx; ratetbl->rate[offset].flags = flags; } static inline int minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate) { int group = MI_RATE_GROUP(rate); rate = MI_RATE_IDX(rate); return mi->groups[group].rates[rate].prob_avg; } static int minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi) { int group = MI_RATE_GROUP(mi->max_prob_rate); const struct mcs_group *g = &minstrel_mcs_groups[group]; int rate = MI_RATE_IDX(mi->max_prob_rate); unsigned int duration; /* Disable A-MSDU if max_prob_rate is bad */ if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100)) return 1; duration = g->duration[rate]; duration <<= g->shift; /* If the rate is slower than single-stream MCS1, make A-MSDU limit small */ if (duration > MCS_DURATION(1, 0, 52)) return 500; /* * If the rate is slower than single-stream MCS4, limit A-MSDU to usual * data packet size */ if (duration > MCS_DURATION(1, 0, 104)) return 1600; /* * If the rate is slower than single-stream MCS7, or if the max throughput * rate success probability is less than 75%, limit A-MSDU to twice the usual * data packet size */ if (duration > MCS_DURATION(1, 0, 260) || (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) < MINSTREL_FRAC(75, 100))) return 3200; /* * HT A-MPDU limits maximum MPDU size under BA agreement to 4095 bytes. * Since aggregation sessions are started/stopped without txq flush, use * the limit here to avoid the complexity of having to de-aggregate * packets in the queue. */ if (!mi->sta->deflink.vht_cap.vht_supported) return IEEE80211_MAX_MPDU_LEN_HT_BA; /* unlimited */ return 0; } static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct ieee80211_sta_rates *rates; int i = 0; int max_rates = min_t(int, mp->hw->max_rates, IEEE80211_TX_RATE_TABLE_SIZE); rates = kzalloc(sizeof(*rates), GFP_ATOMIC); if (!rates) return; /* Start with max_tp_rate[0] */ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); /* Fill up remaining, keep one entry for max_probe_rate */ for (; i < (max_rates - 1); i++) minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]); if (i < max_rates) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); if (i < IEEE80211_TX_RATE_TABLE_SIZE) rates->rate[i].idx = -1; mi->sta->deflink.agg.max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); ieee80211_sta_recalc_aggregates(mi->sta); rate_control_set_rates(mp->hw, mi->sta, rates); } static u16 minstrel_ht_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { u8 seq; if (mp->hw->max_rates > 1) { seq = mi->sample_seq; mi->sample_seq = (seq + 1) % ARRAY_SIZE(minstrel_sample_seq); seq = minstrel_sample_seq[seq]; } else { seq = MINSTREL_SAMPLE_TYPE_INC; } return __minstrel_ht_get_sample_rate(mi, seq); } static void minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) { const struct mcs_group *sample_group; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); struct ieee80211_tx_rate *rate = &info->status.rates[0]; struct minstrel_ht_sta *mi = priv_sta; struct minstrel_priv *mp = priv; u16 sample_idx; info->flags |= mi->tx_flags; #ifdef CONFIG_MAC80211_DEBUGFS if (mp->fixed_rate_idx != -1) return; #endif /* Don't use EAPOL frames for sampling on non-mrr hw */ if (mp->hw->max_rates == 1 && (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) return; if (time_is_after_jiffies(mi->sample_time)) return; mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL; sample_idx = minstrel_ht_get_sample_rate(mp, mi); if (!sample_idx) return; sample_group = &minstrel_mcs_groups[MI_RATE_GROUP(sample_idx)]; sample_idx = MI_RATE_IDX(sample_idx); if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] && (sample_idx >= 4) != txrc->short_preamble) return; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; rate->count = 1; if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); rate->idx = mp->cck_rates[idx]; } else if (sample_group == &minstrel_mcs_groups[MINSTREL_OFDM_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->ofdm_rates[0]); rate->idx = mp->ofdm_rates[mi->band][idx]; } else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(rate, MI_RATE_IDX(sample_idx), sample_group->streams); } else { rate->idx = sample_idx + (sample_group->streams - 1) * 8; } rate->flags = sample_group->flags; } static void minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta) { int i; if (sband->band != NL80211_BAND_2GHZ) return; if (sta->deflink.ht_cap.ht_supported && !ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; for (i = 0; i < 4; i++) { if (mp->cck_rates[i] == 0xff || !rate_supported(sta, sband->band, mp->cck_rates[i])) continue; mi->supported[MINSTREL_CCK_GROUP] |= BIT(i); if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE) mi->supported[MINSTREL_CCK_GROUP] |= BIT(i + 4); } } static void minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta) { const u8 *rates; int i; if (sta->deflink.ht_cap.ht_supported) return; rates = mp->ofdm_rates[sband->band]; for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) { if (rates[i] == 0xff || !rate_supported(sta, sband->band, rates[i])) continue; mi->supported[MINSTREL_OFDM_GROUP] |= BIT(i); } } static void minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta) { struct minstrel_priv *mp = priv; struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_mcs_info *mcs = &sta->deflink.ht_cap.mcs; u16 ht_cap = sta->deflink.ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; const struct ieee80211_rate *ctl_rate; struct sta_info *sta_info; bool ldpc, erp; int use_vht; int ack_dur; int stbc; int i; BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_GROUPS_NB); if (vht_cap->vht_supported) use_vht = vht_cap->vht_mcs.tx_mcs_map != cpu_to_le16(~0); else use_vht = 0; memset(mi, 0, sizeof(*mi)); mi->sta = sta; mi->band = sband->band; mi->last_stats_update = jiffies; ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1); mi->overhead += ack_dur; mi->overhead_rtscts = mi->overhead + 2 * ack_dur; ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)]; erp = ctl_rate->flags & IEEE80211_RATE_ERP_G; ack_dur = ieee80211_frame_duration(sband->band, 10, ctl_rate->bitrate, erp, 1); mi->overhead_legacy = ack_dur; mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur; mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); if (!use_vht) { stbc = (ht_cap & IEEE80211_HT_CAP_RX_STBC) >> IEEE80211_HT_CAP_RX_STBC_SHIFT; ldpc = ht_cap & IEEE80211_HT_CAP_LDPC_CODING; } else { stbc = (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK) >> IEEE80211_VHT_CAP_RXSTBC_SHIFT; ldpc = vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC; } mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT; if (ldpc) mi->tx_flags |= IEEE80211_TX_CTL_LDPC; for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { u32 gflags = minstrel_mcs_groups[i].flags; int bw, nss; mi->supported[i] = 0; if (minstrel_ht_is_legacy_group(i)) continue; if (gflags & IEEE80211_TX_RC_SHORT_GI) { if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) { if (!(ht_cap & IEEE80211_HT_CAP_SGI_40)) continue; } else { if (!(ht_cap & IEEE80211_HT_CAP_SGI_20)) continue; } } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH && sta->deflink.bandwidth < IEEE80211_STA_RX_BW_40) continue; nss = minstrel_mcs_groups[i].streams; /* Mark MCS > 7 as unsupported if STA is in static SMPS mode */ if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC && nss > 1) continue; /* HT rate */ if (gflags & IEEE80211_TX_RC_MCS) { if (use_vht && minstrel_vht_only) continue; mi->supported[i] = mcs->rx_mask[nss - 1]; continue; } /* VHT rate */ if (!vht_cap->vht_supported || WARN_ON(!(gflags & IEEE80211_TX_RC_VHT_MCS)) || WARN_ON(gflags & IEEE80211_TX_RC_160_MHZ_WIDTH)) continue; if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) { if (sta->deflink.bandwidth < IEEE80211_STA_RX_BW_80 || ((gflags & IEEE80211_TX_RC_SHORT_GI) && !(vht_cap->cap & IEEE80211_VHT_CAP_SHORT_GI_80))) { continue; } } if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) bw = BW_40; else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) bw = BW_80; else bw = BW_20; mi->supported[i] = minstrel_get_valid_vht_rates(bw, nss, vht_cap->vht_mcs.tx_mcs_map); } sta_info = container_of(sta, struct sta_info, sta); mi->use_short_preamble = test_sta_flag(sta_info, WLAN_STA_SHORT_PREAMBLE) && sta_info->sdata->vif.bss_conf.use_short_preamble; minstrel_ht_update_cck(mp, mi, sband, sta); minstrel_ht_update_ofdm(mp, mi, sband, sta); /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi); minstrel_ht_update_rates(mp, mi); } static void minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta) { minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta); } static void minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, struct ieee80211_sta *sta, void *priv_sta, u32 changed) { minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta); } static void * minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { struct ieee80211_supported_band *sband; struct minstrel_ht_sta *mi; struct minstrel_priv *mp = priv; struct ieee80211_hw *hw = mp->hw; int max_rates = 0; int i; for (i = 0; i < NUM_NL80211_BANDS; i++) { sband = hw->wiphy->bands[i]; if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; } return kzalloc(sizeof(*mi), gfp); } static void minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) { kfree(priv_sta); } static void minstrel_ht_fill_rate_array(u8 *dest, struct ieee80211_supported_band *sband, const s16 *bitrates, int n_rates) { int i, j; for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *rate = &sband->bitrates[i]; for (j = 0; j < n_rates; j++) { if (rate->bitrate != bitrates[j]) continue; dest[j] = i; break; } } } static void minstrel_ht_init_cck_rates(struct minstrel_priv *mp) { static const s16 bitrates[4] = { 10, 20, 55, 110 }; struct ieee80211_supported_band *sband; memset(mp->cck_rates, 0xff, sizeof(mp->cck_rates)); sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; if (!sband) return; BUILD_BUG_ON(ARRAY_SIZE(mp->cck_rates) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->cck_rates, sband, minstrel_cck_bitrates, ARRAY_SIZE(minstrel_cck_bitrates)); } static void minstrel_ht_init_ofdm_rates(struct minstrel_priv *mp, enum nl80211_band band) { static const s16 bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; struct ieee80211_supported_band *sband; memset(mp->ofdm_rates[band], 0xff, sizeof(mp->ofdm_rates[band])); sband = mp->hw->wiphy->bands[band]; if (!sband) return; BUILD_BUG_ON(ARRAY_SIZE(mp->ofdm_rates[band]) != ARRAY_SIZE(bitrates)); minstrel_ht_fill_rate_array(mp->ofdm_rates[band], sband, minstrel_ofdm_bitrates, ARRAY_SIZE(minstrel_ofdm_bitrates)); } static void * minstrel_ht_alloc(struct ieee80211_hw *hw) { struct minstrel_priv *mp; int i; mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC); if (!mp) return NULL; /* contention window settings * Just an approximation. Using the per-queue values would complicate * the calculations and is probably unnecessary */ mp->cw_min = 15; mp->cw_max = 1023; /* maximum time that the hw is allowed to stay in one MRR segment */ mp->segment_size = 6000; if (hw->max_rate_tries > 0) mp->max_retry = hw->max_rate_tries; else /* safe default, does not necessarily have to match hw properties */ mp->max_retry = 7; mp->hw = hw; mp->update_interval = HZ / 20; minstrel_ht_init_cck_rates(mp); for (i = 0; i < ARRAY_SIZE(mp->hw->wiphy->bands); i++) minstrel_ht_init_ofdm_rates(mp, i); return mp; } #ifdef CONFIG_MAC80211_DEBUGFS static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv, struct dentry *debugfsdir) { struct minstrel_priv *mp = priv; mp->fixed_rate_idx = (u32) -1; debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); } #endif static void minstrel_ht_free(void *priv) { kfree(priv); } static u32 minstrel_ht_get_expected_throughput(void *priv_sta) { struct minstrel_ht_sta *mi = priv_sta; int i, j, prob, tp_avg; i = MI_RATE_GROUP(mi->max_tp_rate[0]); j = MI_RATE_IDX(mi->max_tp_rate[0]); prob = mi->groups[i].rates[j].prob_avg; /* convert tp_avg from pkt per second in kbps */ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10; tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024; return tp_avg; } static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", .capa = RATE_CTRL_CAPA_AMPDU_TRIGGER, .tx_status_ext = minstrel_ht_tx_status, .get_rate = minstrel_ht_get_rate, .rate_init = minstrel_ht_rate_init, .rate_update = minstrel_ht_rate_update, .alloc_sta = minstrel_ht_alloc_sta, .free_sta = minstrel_ht_free_sta, .alloc = minstrel_ht_alloc, .free = minstrel_ht_free, #ifdef CONFIG_MAC80211_DEBUGFS .add_debugfs = minstrel_ht_add_debugfs, .add_sta_debugfs = minstrel_ht_add_sta_debugfs, #endif .get_expected_throughput = minstrel_ht_get_expected_throughput, }; static void __init init_sample_table(void) { int col, i, new_idx; u8 rnd[MCS_GROUP_RATES]; memset(sample_table, 0xff, sizeof(sample_table)); for (col = 0; col < SAMPLE_COLUMNS; col++) { get_random_bytes(rnd, sizeof(rnd)); for (i = 0; i < MCS_GROUP_RATES; i++) { new_idx = (i + rnd[i]) % MCS_GROUP_RATES; while (sample_table[col][new_idx] != 0xff) new_idx = (new_idx + 1) % MCS_GROUP_RATES; sample_table[col][new_idx] = i; } } } int __init rc80211_minstrel_init(void) { init_sample_table(); return ieee80211_rate_control_register(&mac80211_minstrel_ht); } void rc80211_minstrel_exit(void) { ieee80211_rate_control_unregister(&mac80211_minstrel_ht); } |
| 12 22 14 30 19 8 5 22 22 15 15 12 4 7 5 5 4 1 5 5 2008 1940 127 62 7 6 6 16 2 14 14 2 5 5 9 7 7 14 14 21 2 2 17 1 1 6 10 11 3 2 10 6 7 17 1 3 1 2 50 1 1 1 8 1 4 7 4 12 4 8 12 13 5 11 5 1 7 2 4 2 3 5 1 2 1 2 1 4 3 1 3 1 1 1 1 2 2 21 1 1 1 1 1 2 4 1 1 1 1 1 1 1 3 1 1 5 3 3 12 12 6 11 35 1 1 1 31 2 2 32 30 3 19 19 19 19 13 18 1 2 12 6 14 1 14 7 7 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* raw.c - Raw sockets for protocol family CAN * * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/init.h> #include <linux/uio.h> #include <linux/net.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/socket.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/dev.h> /* for can_is_canxl_dev_mtu() */ #include <linux/can/skb.h> #include <linux/can/raw.h> #include <net/sock.h> #include <net/net_namespace.h> MODULE_DESCRIPTION("PF_CAN raw protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>"); MODULE_ALIAS("can-proto-1"); #define RAW_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex) #define MASK_ALL 0 /* A raw socket has a list of can_filters attached to it, each receiving * the CAN frames matching that filter. If the filter list is empty, * no CAN frames will be received by the socket. The default after * opening the socket, is to have one filter which receives all frames. * The filter list is allocated dynamically with the exception of the * list containing only one item. This common case is optimized by * storing the single filter in dfilter, to avoid using dynamic memory. */ struct uniqframe { const struct sk_buff *skb; int skbcnt; unsigned int join_rx_count; }; struct raw_sock { struct sock sk; struct net_device *dev; netdevice_tracker dev_tracker; struct list_head notifier; int ifindex; unsigned int bound:1; unsigned int loopback:1; unsigned int recv_own_msgs:1; unsigned int fd_frames:1; unsigned int xl_frames:1; unsigned int join_filters:1; struct can_raw_vcid_options raw_vcid_opts; canid_t tx_vcid_shifted; canid_t rx_vcid_shifted; canid_t rx_vcid_mask_shifted; can_err_mask_t err_mask; int count; /* number of active filters */ struct can_filter dfilter; /* default/single filter */ struct can_filter *filter; /* pointer to filter(s) */ struct uniqframe __percpu *uniq; }; static LIST_HEAD(raw_notifier_list); static DEFINE_SPINLOCK(raw_notifier_lock); static struct raw_sock *raw_busy_notifier; /* Return pointer to store the extra msg flags for raw_recvmsg(). * We use the space of one unsigned int beyond the 'struct sockaddr_can' * in skb->cb. */ static inline unsigned int *raw_flags(struct sk_buff *skb) { sock_skb_cb_check_size(sizeof(struct sockaddr_can) + sizeof(unsigned int)); /* return pointer after struct sockaddr_can */ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]); } static inline struct raw_sock *raw_sk(const struct sock *sk) { return (struct raw_sock *)sk; } static void raw_rcv(struct sk_buff *oskb, void *data) { struct sock *sk = (struct sock *)data; struct raw_sock *ro = raw_sk(sk); enum skb_drop_reason reason; struct sockaddr_can *addr; struct sk_buff *skb; unsigned int *pflags; /* check the received tx sock reference */ if (!ro->recv_own_msgs && oskb->sk == sk) return; /* make sure to not pass oversized frames to the socket */ if (!ro->fd_frames && can_is_canfd_skb(oskb)) return; if (can_is_canxl_skb(oskb)) { struct canxl_frame *cxl = (struct canxl_frame *)oskb->data; /* make sure to not pass oversized frames to the socket */ if (!ro->xl_frames) return; /* filter CAN XL VCID content */ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_RX_FILTER) { /* apply VCID filter if user enabled the filter */ if ((cxl->prio & ro->rx_vcid_mask_shifted) != (ro->rx_vcid_shifted & ro->rx_vcid_mask_shifted)) return; } else { /* no filter => do not forward VCID tagged frames */ if (cxl->prio & CANXL_VCID_MASK) return; } } /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (!ro->join_filters) return; this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count) return; } else { this_cpu_ptr(ro->uniq)->skb = oskb; this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) return; } /* clone the given skb to be able to enqueue it into the rcv queue */ skb = skb_clone(oskb, GFP_ATOMIC); if (!skb) return; /* Put the datagram to the queue so that raw_recvmsg() can get * it from there. We need to pass the interface index to * raw_recvmsg(). We pass a whole struct sockaddr_can in * skb->cb containing the interface index. */ sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = skb->dev->ifindex; /* add CAN specific message flags for raw_recvmsg() */ pflags = raw_flags(skb); *pflags = 0; if (oskb->sk) *pflags |= MSG_DONTROUTE; if (oskb->sk == sk) *pflags |= MSG_CONFIRM; if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) sk_skb_reason_drop(sk, skb, reason); } static int raw_enable_filters(struct net *net, struct net_device *dev, struct sock *sk, struct can_filter *filter, int count) { int err = 0; int i; for (i = 0; i < count; i++) { err = can_rx_register(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) can_rx_unregister(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk); break; } } return err; } static int raw_enable_errfilter(struct net *net, struct net_device *dev, struct sock *sk, can_err_mask_t err_mask) { int err = 0; if (err_mask) err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk, "raw", sk); return err; } static void raw_disable_filters(struct net *net, struct net_device *dev, struct sock *sk, struct can_filter *filter, int count) { int i; for (i = 0; i < count; i++) can_rx_unregister(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk); } static inline void raw_disable_errfilter(struct net *net, struct net_device *dev, struct sock *sk, can_err_mask_t err_mask) { if (err_mask) can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk); } static inline void raw_disable_allfilters(struct net *net, struct net_device *dev, struct sock *sk) { struct raw_sock *ro = raw_sk(sk); raw_disable_filters(net, dev, sk, ro->filter, ro->count); raw_disable_errfilter(net, dev, sk, ro->err_mask); } static int raw_enable_allfilters(struct net *net, struct net_device *dev, struct sock *sk) { struct raw_sock *ro = raw_sk(sk); int err; err = raw_enable_filters(net, dev, sk, ro->filter, ro->count); if (!err) { err = raw_enable_errfilter(net, dev, sk, ro->err_mask); if (err) raw_disable_filters(net, dev, sk, ro->filter, ro->count); } return err; } static void raw_notify(struct raw_sock *ro, unsigned long msg, struct net_device *dev) { struct sock *sk = &ro->sk; if (!net_eq(dev_net(dev), sock_net(sk))) return; if (ro->dev != dev) return; switch (msg) { case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) { raw_disable_allfilters(dev_net(dev), dev, sk); netdev_put(dev, &ro->dev_tracker); } if (ro->count > 1) kfree(ro->filter); ro->ifindex = 0; ro->bound = 0; ro->dev = NULL; ro->count = 0; release_sock(sk); sk->sk_err = ENODEV; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); break; case NETDEV_DOWN: sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); break; } } static int raw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) return NOTIFY_DONE; if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */ return NOTIFY_DONE; spin_lock(&raw_notifier_lock); list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) { spin_unlock(&raw_notifier_lock); raw_notify(raw_busy_notifier, msg, dev); spin_lock(&raw_notifier_lock); } raw_busy_notifier = NULL; spin_unlock(&raw_notifier_lock); return NOTIFY_DONE; } static int raw_init(struct sock *sk) { struct raw_sock *ro = raw_sk(sk); ro->bound = 0; ro->ifindex = 0; ro->dev = NULL; /* set default filter to single entry dfilter */ ro->dfilter.can_id = 0; ro->dfilter.can_mask = MASK_ALL; ro->filter = &ro->dfilter; ro->count = 1; /* set default loopback behaviour */ ro->loopback = 1; ro->recv_own_msgs = 0; ro->fd_frames = 0; ro->xl_frames = 0; ro->join_filters = 0; /* alloc_percpu provides zero'ed memory */ ro->uniq = alloc_percpu(struct uniqframe); if (unlikely(!ro->uniq)) return -ENOMEM; /* set notifier */ spin_lock(&raw_notifier_lock); list_add_tail(&ro->notifier, &raw_notifier_list); spin_unlock(&raw_notifier_lock); return 0; } static int raw_release(struct socket *sock) { struct sock *sk = sock->sk; struct raw_sock *ro; struct net *net; if (!sk) return 0; ro = raw_sk(sk); net = sock_net(sk); spin_lock(&raw_notifier_lock); while (raw_busy_notifier == ro) { spin_unlock(&raw_notifier_lock); schedule_timeout_uninterruptible(1); spin_lock(&raw_notifier_lock); } list_del(&ro->notifier); spin_unlock(&raw_notifier_lock); rtnl_lock(); lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) { if (ro->dev) { raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); netdev_put(ro->dev, &ro->dev_tracker); } else { raw_disable_allfilters(net, NULL, sk); } } if (ro->count > 1) kfree(ro->filter); ro->ifindex = 0; ro->bound = 0; ro->dev = NULL; ro->count = 0; free_percpu(ro->uniq); sock_orphan(sk); sock->sk = NULL; release_sock(sk); rtnl_unlock(); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_put(sk); return 0; } static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); struct net_device *dev = NULL; int ifindex; int err = 0; int notify_enetdown = 0; if (len < RAW_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; rtnl_lock(); lock_sock(sk); if (ro->bound && addr->can_ifindex == ro->ifindex) goto out; if (addr->can_ifindex) { dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); if (!dev) { err = -ENODEV; goto out; } if (dev->type != ARPHRD_CAN) { err = -ENODEV; goto out_put_dev; } if (!(dev->flags & IFF_UP)) notify_enetdown = 1; ifindex = dev->ifindex; /* filters set by default/setsockopt */ err = raw_enable_allfilters(sock_net(sk), dev, sk); if (err) goto out_put_dev; } else { ifindex = 0; /* filters set by default/setsockopt */ err = raw_enable_allfilters(sock_net(sk), NULL, sk); } if (!err) { if (ro->bound) { /* unregister old filters */ if (ro->dev) { raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); /* drop reference to old ro->dev */ netdev_put(ro->dev, &ro->dev_tracker); } else { raw_disable_allfilters(sock_net(sk), NULL, sk); } } ro->ifindex = ifindex; ro->bound = 1; /* bind() ok -> hold a reference for new ro->dev */ ro->dev = dev; if (ro->dev) netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL); } out_put_dev: /* remove potential reference from dev_get_by_index() */ dev_put(dev); out: release_sock(sk); rtnl_unlock(); if (notify_enetdown) { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } return err; } static int raw_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); if (peer) return -EOPNOTSUPP; memset(addr, 0, RAW_MIN_NAMELEN); addr->can_family = AF_CAN; addr->can_ifindex = ro->ifindex; return RAW_MIN_NAMELEN; } static int raw_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); struct can_filter *filter = NULL; /* dyn. alloc'ed filters */ struct can_filter sfilter; /* single filter */ struct net_device *dev = NULL; can_err_mask_t err_mask = 0; int count = 0; int flag; int err = 0; if (level != SOL_CAN_RAW) return -EINVAL; switch (optname) { case CAN_RAW_FILTER: if (optlen % sizeof(struct can_filter) != 0) return -EINVAL; if (optlen > CAN_RAW_FILTER_MAX * sizeof(struct can_filter)) return -EINVAL; count = optlen / sizeof(struct can_filter); if (count > 1) { /* filter does not fit into dfilter => alloc space */ filter = memdup_sockptr(optval, optlen); if (IS_ERR(filter)) return PTR_ERR(filter); } else if (count == 1) { if (copy_from_sockptr(&sfilter, optval, sizeof(sfilter))) return -EFAULT; } rtnl_lock(); lock_sock(sk); dev = ro->dev; if (ro->bound && dev) { if (dev->reg_state != NETREG_REGISTERED) { if (count > 1) kfree(filter); err = -ENODEV; goto out_fil; } } if (ro->bound) { /* (try to) register the new filters */ if (count == 1) err = raw_enable_filters(sock_net(sk), dev, sk, &sfilter, 1); else err = raw_enable_filters(sock_net(sk), dev, sk, filter, count); if (err) { if (count > 1) kfree(filter); goto out_fil; } /* remove old filter registrations */ raw_disable_filters(sock_net(sk), dev, sk, ro->filter, ro->count); } /* remove old filter space */ if (ro->count > 1) kfree(ro->filter); /* link new filters to the socket */ if (count == 1) { /* copy filter data for single filter */ ro->dfilter = sfilter; filter = &ro->dfilter; } ro->filter = filter; ro->count = count; out_fil: release_sock(sk); rtnl_unlock(); break; case CAN_RAW_ERR_FILTER: if (optlen != sizeof(err_mask)) return -EINVAL; if (copy_from_sockptr(&err_mask, optval, optlen)) return -EFAULT; err_mask &= CAN_ERR_MASK; rtnl_lock(); lock_sock(sk); dev = ro->dev; if (ro->bound && dev) { if (dev->reg_state != NETREG_REGISTERED) { err = -ENODEV; goto out_err; } } /* remove current error mask */ if (ro->bound) { /* (try to) register the new err_mask */ err = raw_enable_errfilter(sock_net(sk), dev, sk, err_mask); if (err) goto out_err; /* remove old err_mask registration */ raw_disable_errfilter(sock_net(sk), dev, sk, ro->err_mask); } /* link new err_mask to the socket */ ro->err_mask = err_mask; out_err: release_sock(sk); rtnl_unlock(); break; case CAN_RAW_LOOPBACK: if (optlen != sizeof(flag)) return -EINVAL; if (copy_from_sockptr(&flag, optval, optlen)) return -EFAULT; ro->loopback = !!flag; break; case CAN_RAW_RECV_OWN_MSGS: if (optlen != sizeof(flag)) return -EINVAL; if (copy_from_sockptr(&flag, optval, optlen)) return -EFAULT; ro->recv_own_msgs = !!flag; break; case CAN_RAW_FD_FRAMES: if (optlen != sizeof(flag)) return -EINVAL; if (copy_from_sockptr(&flag, optval, optlen)) return -EFAULT; /* Enabling CAN XL includes CAN FD */ if (ro->xl_frames && !flag) return -EINVAL; ro->fd_frames = !!flag; break; case CAN_RAW_XL_FRAMES: if (optlen != sizeof(flag)) return -EINVAL; if (copy_from_sockptr(&flag, optval, optlen)) return -EFAULT; ro->xl_frames = !!flag; /* Enabling CAN XL includes CAN FD */ if (ro->xl_frames) ro->fd_frames = ro->xl_frames; break; case CAN_RAW_XL_VCID_OPTS: if (optlen != sizeof(ro->raw_vcid_opts)) return -EINVAL; if (copy_from_sockptr(&ro->raw_vcid_opts, optval, optlen)) return -EFAULT; /* prepare 32 bit values for handling in hot path */ ro->tx_vcid_shifted = ro->raw_vcid_opts.tx_vcid << CANXL_VCID_OFFSET; ro->rx_vcid_shifted = ro->raw_vcid_opts.rx_vcid << CANXL_VCID_OFFSET; ro->rx_vcid_mask_shifted = ro->raw_vcid_opts.rx_vcid_mask << CANXL_VCID_OFFSET; break; case CAN_RAW_JOIN_FILTERS: if (optlen != sizeof(flag)) return -EINVAL; if (copy_from_sockptr(&flag, optval, optlen)) return -EFAULT; ro->join_filters = !!flag; break; default: return -ENOPROTOOPT; } return err; } static int raw_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); int flag; int len; void *val; if (level != SOL_CAN_RAW) return -EINVAL; if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; switch (optname) { case CAN_RAW_FILTER: { int err = 0; lock_sock(sk); if (ro->count > 0) { int fsize = ro->count * sizeof(struct can_filter); /* user space buffer to small for filter list? */ if (len < fsize) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; if (put_user(fsize, optlen)) err = -EFAULT; } else { if (len > fsize) len = fsize; if (copy_to_user(optval, ro->filter, len)) err = -EFAULT; } } else { len = 0; } release_sock(sk); if (!err) err = put_user(len, optlen); return err; } case CAN_RAW_ERR_FILTER: if (len > sizeof(can_err_mask_t)) len = sizeof(can_err_mask_t); val = &ro->err_mask; break; case CAN_RAW_LOOPBACK: if (len > sizeof(int)) len = sizeof(int); flag = ro->loopback; val = &flag; break; case CAN_RAW_RECV_OWN_MSGS: if (len > sizeof(int)) len = sizeof(int); flag = ro->recv_own_msgs; val = &flag; break; case CAN_RAW_FD_FRAMES: if (len > sizeof(int)) len = sizeof(int); flag = ro->fd_frames; val = &flag; break; case CAN_RAW_XL_FRAMES: if (len > sizeof(int)) len = sizeof(int); flag = ro->xl_frames; val = &flag; break; case CAN_RAW_XL_VCID_OPTS: { int err = 0; /* user space buffer to small for VCID opts? */ if (len < sizeof(ro->raw_vcid_opts)) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; if (put_user(sizeof(ro->raw_vcid_opts), optlen)) err = -EFAULT; } else { if (len > sizeof(ro->raw_vcid_opts)) len = sizeof(ro->raw_vcid_opts); if (copy_to_user(optval, &ro->raw_vcid_opts, len)) err = -EFAULT; } if (!err) err = put_user(len, optlen); return err; } case CAN_RAW_JOIN_FILTERS: if (len > sizeof(int)) len = sizeof(int); flag = ro->join_filters; val = &flag; break; default: return -ENOPROTOOPT; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, val, len)) return -EFAULT; return 0; } static void raw_put_canxl_vcid(struct raw_sock *ro, struct sk_buff *skb) { struct canxl_frame *cxl = (struct canxl_frame *)skb->data; /* sanitize non CAN XL bits */ cxl->prio &= (CANXL_PRIO_MASK | CANXL_VCID_MASK); /* clear VCID in CAN XL frame if pass through is disabled */ if (!(ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_PASS)) cxl->prio &= CANXL_PRIO_MASK; /* set VCID in CAN XL frame if enabled */ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_SET) { cxl->prio &= CANXL_PRIO_MASK; cxl->prio |= ro->tx_vcid_shifted; } } static unsigned int raw_check_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu) { /* Classical CAN -> no checks for flags and device capabilities */ if (can_is_can_skb(skb)) return CAN_MTU; /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */ if (ro->fd_frames && can_is_canfd_skb(skb) && (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu))) return CANFD_MTU; /* CAN XL -> needs to be enabled and a CAN XL device */ if (ro->xl_frames && can_is_canxl_skb(skb) && can_is_canxl_dev_mtu(mtu)) return CANXL_MTU; return 0; } static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); struct sockcm_cookie sockc; struct sk_buff *skb; struct net_device *dev; unsigned int txmtu; int ifindex; int err = -EINVAL; /* check for valid CAN frame sizes */ if (size < CANXL_HDR_SIZE + CANXL_MIN_DLEN || size > CANXL_MTU) return -EINVAL; if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); if (msg->msg_namelen < RAW_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; ifindex = addr->can_ifindex; } else { ifindex = ro->ifindex; } dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENXIO; skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto put_dev; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; /* fill the skb before testing for valid CAN frames */ err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) goto free_skb; err = -EINVAL; /* check for valid CAN (CC/FD/XL) frame content */ txmtu = raw_check_txframe(ro, skb, READ_ONCE(dev->mtu)); if (!txmtu) goto free_skb; /* only CANXL: clear/forward/set VCID value */ if (txmtu == CANXL_MTU) raw_put_canxl_vcid(ro, skb); sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) goto free_skb; } skb->dev = dev; skb->priority = sockc.priority; skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, &sockc); err = can_send(skb, ro->loopback); dev_put(dev); if (err) goto send_failed; return size; free_skb: kfree_skb(skb); put_dev: dev_put(dev); send_failed: return err; } static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int err = 0; if (flags & MSG_ERRQUEUE) return sock_recv_errqueue(sk, msg, size, SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE); skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else size = skb->len; err = memcpy_to_msg(msg, skb->data, size); if (err < 0) { skb_free_datagram(sk, skb); return err; } sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(RAW_MIN_NAMELEN); msg->msg_namelen = RAW_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } /* assign the flags that have been recorded in raw_rcv() */ msg->msg_flags |= *(raw_flags(skb)); skb_free_datagram(sk, skb); return size; } static int raw_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { /* no ioctls for socket layer -> hand it down to NIC layer */ return -ENOIOCTLCMD; } static const struct proto_ops raw_ops = { .family = PF_CAN, .release = raw_release, .bind = raw_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = raw_getname, .poll = datagram_poll, .ioctl = raw_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, .getsockopt = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, }; static struct proto raw_proto __read_mostly = { .name = "CAN_RAW", .owner = THIS_MODULE, .obj_size = sizeof(struct raw_sock), .init = raw_init, }; static const struct can_proto raw_can_proto = { .type = SOCK_RAW, .protocol = CAN_RAW, .ops = &raw_ops, .prot = &raw_proto, }; static struct notifier_block canraw_notifier = { .notifier_call = raw_notifier }; static __init int raw_module_init(void) { int err; pr_info("can: raw protocol\n"); err = register_netdevice_notifier(&canraw_notifier); if (err) return err; err = can_proto_register(&raw_can_proto); if (err < 0) { pr_err("can: registration of raw protocol failed\n"); goto register_proto_failed; } return 0; register_proto_failed: unregister_netdevice_notifier(&canraw_notifier); return err; } static __exit void raw_module_exit(void) { can_proto_unregister(&raw_can_proto); unregister_netdevice_notifier(&canraw_notifier); } module_init(raw_module_init); module_exit(raw_module_exit); |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 | // SPDX-License-Identifier: GPL-2.0-or-later // // core.c -- Voltage/Current Regulator framework. // // Copyright 2007, 2008 Wolfson Microelectronics PLC. // Copyright 2008 SlimLogic Ltd. // // Author: Liam Girdwood <lrg@slimlogic.co.uk> #include <linux/kernel.h> #include <linux/init.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/slab.h> #include <linux/async.h> #include <linux/err.h> #include <linux/mutex.h> #include <linux/suspend.h> #include <linux/delay.h> #include <linux/gpio/consumer.h> #include <linux/of.h> #include <linux/reboot.h> #include <linux/regmap.h> #include <linux/regulator/of_regulator.h> #include <linux/regulator/consumer.h> #include <linux/regulator/coupler.h> #include <linux/regulator/driver.h> #include <linux/regulator/machine.h> #include <linux/module.h> #define CREATE_TRACE_POINTS #include <trace/events/regulator.h> #include "dummy.h" #include "internal.h" #include "regnl.h" static DEFINE_WW_CLASS(regulator_ww_class); static DEFINE_MUTEX(regulator_nesting_mutex); static DEFINE_MUTEX(regulator_list_mutex); static LIST_HEAD(regulator_map_list); static LIST_HEAD(regulator_ena_gpio_list); static LIST_HEAD(regulator_supply_alias_list); static LIST_HEAD(regulator_coupler_list); static bool has_full_constraints; static struct dentry *debugfs_root; /* * struct regulator_map * * Used to provide symbolic supply names to devices. */ struct regulator_map { struct list_head list; const char *dev_name; /* The dev_name() for the consumer */ const char *supply; struct regulator_dev *regulator; }; /* * struct regulator_enable_gpio * * Management for shared enable GPIO pin */ struct regulator_enable_gpio { struct list_head list; struct gpio_desc *gpiod; u32 enable_count; /* a number of enabled shared GPIO */ u32 request_count; /* a number of requested shared GPIO */ }; /* * struct regulator_supply_alias * * Used to map lookups for a supply onto an alternative device. */ struct regulator_supply_alias { struct list_head list; struct device *src_dev; const char *src_supply; struct device *alias_dev; const char *alias_supply; }; static int _regulator_is_enabled(struct regulator_dev *rdev); static int _regulator_disable(struct regulator *regulator); static int _regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data); static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); static int regulator_balance_voltage(struct regulator_dev *rdev, suspend_state_t state); static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, const char *supply_name); static void destroy_regulator(struct regulator *regulator); static void _regulator_put(struct regulator *regulator); const char *rdev_get_name(struct regulator_dev *rdev) { if (rdev->constraints && rdev->constraints->name) return rdev->constraints->name; else if (rdev->desc->name) return rdev->desc->name; else return ""; } EXPORT_SYMBOL_GPL(rdev_get_name); static bool have_full_constraints(void) { return has_full_constraints || of_have_populated_dt(); } static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops) { if (!rdev->constraints) { rdev_err(rdev, "no constraints\n"); return false; } if (rdev->constraints->valid_ops_mask & ops) return true; return false; } /** * regulator_lock_nested - lock a single regulator * @rdev: regulator source * @ww_ctx: w/w mutex acquire context * * This function can be called many times by one task on * a single regulator and its mutex will be locked only * once. If a task, which is calling this function is other * than the one, which initially locked the mutex, it will * wait on mutex. * * Return: 0 on success or a negative error number on failure. */ static inline int regulator_lock_nested(struct regulator_dev *rdev, struct ww_acquire_ctx *ww_ctx) { bool lock = false; int ret = 0; mutex_lock(®ulator_nesting_mutex); if (!ww_mutex_trylock(&rdev->mutex, ww_ctx)) { if (rdev->mutex_owner == current) rdev->ref_cnt++; else lock = true; if (lock) { mutex_unlock(®ulator_nesting_mutex); ret = ww_mutex_lock(&rdev->mutex, ww_ctx); mutex_lock(®ulator_nesting_mutex); } } else { lock = true; } if (lock && ret != -EDEADLK) { rdev->ref_cnt++; rdev->mutex_owner = current; } mutex_unlock(®ulator_nesting_mutex); return ret; } /** * regulator_lock - lock a single regulator * @rdev: regulator source * * This function can be called many times by one task on * a single regulator and its mutex will be locked only * once. If a task, which is calling this function is other * than the one, which initially locked the mutex, it will * wait on mutex. */ static void regulator_lock(struct regulator_dev *rdev) { regulator_lock_nested(rdev, NULL); } /** * regulator_unlock - unlock a single regulator * @rdev: regulator_source * * This function unlocks the mutex when the * reference counter reaches 0. */ static void regulator_unlock(struct regulator_dev *rdev) { mutex_lock(®ulator_nesting_mutex); if (--rdev->ref_cnt == 0) { rdev->mutex_owner = NULL; ww_mutex_unlock(&rdev->mutex); } WARN_ON_ONCE(rdev->ref_cnt < 0); mutex_unlock(®ulator_nesting_mutex); } /** * regulator_lock_two - lock two regulators * @rdev1: first regulator * @rdev2: second regulator * @ww_ctx: w/w mutex acquire context * * Locks both rdevs using the regulator_ww_class. */ static void regulator_lock_two(struct regulator_dev *rdev1, struct regulator_dev *rdev2, struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *held, *contended; int ret; ww_acquire_init(ww_ctx, ®ulator_ww_class); /* Try to just grab both of them */ ret = regulator_lock_nested(rdev1, ww_ctx); WARN_ON(ret); ret = regulator_lock_nested(rdev2, ww_ctx); if (ret != -EDEADLOCK) { WARN_ON(ret); goto exit; } held = rdev1; contended = rdev2; while (true) { regulator_unlock(held); ww_mutex_lock_slow(&contended->mutex, ww_ctx); contended->ref_cnt++; contended->mutex_owner = current; swap(held, contended); ret = regulator_lock_nested(contended, ww_ctx); if (ret != -EDEADLOCK) { WARN_ON(ret); break; } } exit: ww_acquire_done(ww_ctx); } /** * regulator_unlock_two - unlock two regulators * @rdev1: first regulator * @rdev2: second regulator * @ww_ctx: w/w mutex acquire context * * The inverse of regulator_lock_two(). */ static void regulator_unlock_two(struct regulator_dev *rdev1, struct regulator_dev *rdev2, struct ww_acquire_ctx *ww_ctx) { regulator_unlock(rdev2); regulator_unlock(rdev1); ww_acquire_fini(ww_ctx); } static bool regulator_supply_is_couple(struct regulator_dev *rdev) { struct regulator_dev *c_rdev; int i; for (i = 1; i < rdev->coupling_desc.n_coupled; i++) { c_rdev = rdev->coupling_desc.coupled_rdevs[i]; if (rdev->supply->rdev == c_rdev) return true; } return false; } static void regulator_unlock_recursive(struct regulator_dev *rdev, unsigned int n_coupled) { struct regulator_dev *c_rdev, *supply_rdev; int i, supply_n_coupled; for (i = n_coupled; i > 0; i--) { c_rdev = rdev->coupling_desc.coupled_rdevs[i - 1]; if (!c_rdev) continue; if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) { supply_rdev = c_rdev->supply->rdev; supply_n_coupled = supply_rdev->coupling_desc.n_coupled; regulator_unlock_recursive(supply_rdev, supply_n_coupled); } regulator_unlock(c_rdev); } } static int regulator_lock_recursive(struct regulator_dev *rdev, struct regulator_dev **new_contended_rdev, struct regulator_dev **old_contended_rdev, struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *c_rdev; int i, err; for (i = 0; i < rdev->coupling_desc.n_coupled; i++) { c_rdev = rdev->coupling_desc.coupled_rdevs[i]; if (!c_rdev) continue; if (c_rdev != *old_contended_rdev) { err = regulator_lock_nested(c_rdev, ww_ctx); if (err) { if (err == -EDEADLK) { *new_contended_rdev = c_rdev; goto err_unlock; } /* shouldn't happen */ WARN_ON_ONCE(err != -EALREADY); } } else { *old_contended_rdev = NULL; } if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) { err = regulator_lock_recursive(c_rdev->supply->rdev, new_contended_rdev, old_contended_rdev, ww_ctx); if (err) { regulator_unlock(c_rdev); goto err_unlock; } } } return 0; err_unlock: regulator_unlock_recursive(rdev, i); return err; } /** * regulator_unlock_dependent - unlock regulator's suppliers and coupled * regulators * @rdev: regulator source * @ww_ctx: w/w mutex acquire context * * Unlock all regulators related with rdev by coupling or supplying. */ static void regulator_unlock_dependent(struct regulator_dev *rdev, struct ww_acquire_ctx *ww_ctx) { regulator_unlock_recursive(rdev, rdev->coupling_desc.n_coupled); ww_acquire_fini(ww_ctx); } /** * regulator_lock_dependent - lock regulator's suppliers and coupled regulators * @rdev: regulator source * @ww_ctx: w/w mutex acquire context * * This function as a wrapper on regulator_lock_recursive(), which locks * all regulators related with rdev by coupling or supplying. */ static void regulator_lock_dependent(struct regulator_dev *rdev, struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *new_contended_rdev = NULL; struct regulator_dev *old_contended_rdev = NULL; int err; mutex_lock(®ulator_list_mutex); ww_acquire_init(ww_ctx, ®ulator_ww_class); do { if (new_contended_rdev) { ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx); old_contended_rdev = new_contended_rdev; old_contended_rdev->ref_cnt++; old_contended_rdev->mutex_owner = current; } err = regulator_lock_recursive(rdev, &new_contended_rdev, &old_contended_rdev, ww_ctx); if (old_contended_rdev) regulator_unlock(old_contended_rdev); } while (err == -EDEADLK); ww_acquire_done(ww_ctx); mutex_unlock(®ulator_list_mutex); } /* Platform voltage constraint check */ int regulator_check_voltage(struct regulator_dev *rdev, int *min_uV, int *max_uV) { BUG_ON(*min_uV > *max_uV); if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { rdev_err(rdev, "voltage operation not allowed\n"); return -EPERM; } if (*max_uV > rdev->constraints->max_uV) *max_uV = rdev->constraints->max_uV; if (*min_uV < rdev->constraints->min_uV) *min_uV = rdev->constraints->min_uV; if (*min_uV > *max_uV) { rdev_err(rdev, "unsupportable voltage range: %d-%duV\n", *min_uV, *max_uV); return -EINVAL; } return 0; } /* return 0 if the state is valid */ static int regulator_check_states(suspend_state_t state) { return (state > PM_SUSPEND_MAX || state == PM_SUSPEND_TO_IDLE); } /* Make sure we select a voltage that suits the needs of all * regulator consumers */ int regulator_check_consumers(struct regulator_dev *rdev, int *min_uV, int *max_uV, suspend_state_t state) { struct regulator *regulator; struct regulator_voltage *voltage; list_for_each_entry(regulator, &rdev->consumer_list, list) { voltage = ®ulator->voltage[state]; /* * Assume consumers that didn't say anything are OK * with anything in the constraint range. */ if (!voltage->min_uV && !voltage->max_uV) continue; if (*max_uV > voltage->max_uV) *max_uV = voltage->max_uV; if (*min_uV < voltage->min_uV) *min_uV = voltage->min_uV; } if (*min_uV > *max_uV) { rdev_err(rdev, "Restricting voltage, %u-%uuV\n", *min_uV, *max_uV); return -EINVAL; } return 0; } /* current constraint check */ static int regulator_check_current_limit(struct regulator_dev *rdev, int *min_uA, int *max_uA) { BUG_ON(*min_uA > *max_uA); if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_CURRENT)) { rdev_err(rdev, "current operation not allowed\n"); return -EPERM; } if (*max_uA > rdev->constraints->max_uA && rdev->constraints->max_uA) *max_uA = rdev->constraints->max_uA; if (*min_uA < rdev->constraints->min_uA) *min_uA = rdev->constraints->min_uA; if (*min_uA > *max_uA) { rdev_err(rdev, "unsupportable current range: %d-%duA\n", *min_uA, *max_uA); return -EINVAL; } return 0; } /* operating mode constraint check */ static int regulator_mode_constrain(struct regulator_dev *rdev, unsigned int *mode) { switch (*mode) { case REGULATOR_MODE_FAST: case REGULATOR_MODE_NORMAL: case REGULATOR_MODE_IDLE: case REGULATOR_MODE_STANDBY: break; default: rdev_err(rdev, "invalid mode %x specified\n", *mode); return -EINVAL; } if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_MODE)) { rdev_err(rdev, "mode operation not allowed\n"); return -EPERM; } /* The modes are bitmasks, the most power hungry modes having * the lowest values. If the requested mode isn't supported * try higher modes. */ while (*mode) { if (rdev->constraints->valid_modes_mask & *mode) return 0; *mode /= 2; } return -EINVAL; } static inline struct regulator_state * regulator_get_suspend_state(struct regulator_dev *rdev, suspend_state_t state) { if (rdev->constraints == NULL) return NULL; switch (state) { case PM_SUSPEND_STANDBY: return &rdev->constraints->state_standby; case PM_SUSPEND_MEM: return &rdev->constraints->state_mem; case PM_SUSPEND_MAX: return &rdev->constraints->state_disk; default: return NULL; } } static const struct regulator_state * regulator_get_suspend_state_check(struct regulator_dev *rdev, suspend_state_t state) { const struct regulator_state *rstate; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return NULL; /* If we have no suspend mode configuration don't set anything; * only warn if the driver implements set_suspend_voltage or * set_suspend_mode callback. */ if (rstate->enabled != ENABLE_IN_SUSPEND && rstate->enabled != DISABLE_IN_SUSPEND) { if (rdev->desc->ops->set_suspend_voltage || rdev->desc->ops->set_suspend_mode) rdev_warn(rdev, "No configuration\n"); return NULL; } return rstate; } static ssize_t microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); int uV; regulator_lock(rdev); uV = regulator_get_voltage_rdev(rdev); regulator_unlock(rdev); if (uV < 0) return uV; return sprintf(buf, "%d\n", uV); } static DEVICE_ATTR_RO(microvolts); static ssize_t microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", _regulator_get_current_limit(rdev)); } static DEVICE_ATTR_RO(microamps); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%s\n", rdev_get_name(rdev)); } static DEVICE_ATTR_RO(name); static const char *regulator_opmode_to_str(int mode) { switch (mode) { case REGULATOR_MODE_FAST: return "fast"; case REGULATOR_MODE_NORMAL: return "normal"; case REGULATOR_MODE_IDLE: return "idle"; case REGULATOR_MODE_STANDBY: return "standby"; } return "unknown"; } static ssize_t regulator_print_opmode(char *buf, int mode) { return sprintf(buf, "%s\n", regulator_opmode_to_str(mode)); } static ssize_t opmode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, _regulator_get_mode(rdev)); } static DEVICE_ATTR_RO(opmode); static ssize_t regulator_print_state(char *buf, int state) { if (state > 0) return sprintf(buf, "enabled\n"); else if (state == 0) return sprintf(buf, "disabled\n"); else return sprintf(buf, "unknown\n"); } static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); ssize_t ret; regulator_lock(rdev); ret = regulator_print_state(buf, _regulator_is_enabled(rdev)); regulator_unlock(rdev); return ret; } static DEVICE_ATTR_RO(state); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); int status; char *label; status = rdev->desc->ops->get_status(rdev); if (status < 0) return status; switch (status) { case REGULATOR_STATUS_OFF: label = "off"; break; case REGULATOR_STATUS_ON: label = "on"; break; case REGULATOR_STATUS_ERROR: label = "error"; break; case REGULATOR_STATUS_FAST: label = "fast"; break; case REGULATOR_STATUS_NORMAL: label = "normal"; break; case REGULATOR_STATUS_IDLE: label = "idle"; break; case REGULATOR_STATUS_STANDBY: label = "standby"; break; case REGULATOR_STATUS_BYPASS: label = "bypass"; break; case REGULATOR_STATUS_UNDEFINED: label = "undefined"; break; default: return -ERANGE; } return sprintf(buf, "%s\n", label); } static DEVICE_ATTR_RO(status); static ssize_t min_microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->min_uA); } static DEVICE_ATTR_RO(min_microamps); static ssize_t max_microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->max_uA); } static DEVICE_ATTR_RO(max_microamps); static ssize_t min_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->min_uV); } static DEVICE_ATTR_RO(min_microvolts); static ssize_t max_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->max_uV); } static DEVICE_ATTR_RO(max_microvolts); static ssize_t requested_microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); struct regulator *regulator; int uA = 0; regulator_lock(rdev); list_for_each_entry(regulator, &rdev->consumer_list, list) { if (regulator->enable_count) uA += regulator->uA_load; } regulator_unlock(rdev); return sprintf(buf, "%d\n", uA); } static DEVICE_ATTR_RO(requested_microamps); static ssize_t num_users_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->use_count); } static DEVICE_ATTR_RO(num_users); static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); switch (rdev->desc->type) { case REGULATOR_VOLTAGE: return sprintf(buf, "voltage\n"); case REGULATOR_CURRENT: return sprintf(buf, "current\n"); } return sprintf(buf, "unknown\n"); } static DEVICE_ATTR_RO(type); static ssize_t suspend_mem_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->state_mem.uV); } static DEVICE_ATTR_RO(suspend_mem_microvolts); static ssize_t suspend_disk_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->state_disk.uV); } static DEVICE_ATTR_RO(suspend_disk_microvolts); static ssize_t suspend_standby_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->state_standby.uV); } static DEVICE_ATTR_RO(suspend_standby_microvolts); static ssize_t suspend_mem_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, rdev->constraints->state_mem.mode); } static DEVICE_ATTR_RO(suspend_mem_mode); static ssize_t suspend_disk_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, rdev->constraints->state_disk.mode); } static DEVICE_ATTR_RO(suspend_disk_mode); static ssize_t suspend_standby_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, rdev->constraints->state_standby.mode); } static DEVICE_ATTR_RO(suspend_standby_mode); static ssize_t suspend_mem_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_state(buf, rdev->constraints->state_mem.enabled); } static DEVICE_ATTR_RO(suspend_mem_state); static ssize_t suspend_disk_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_state(buf, rdev->constraints->state_disk.enabled); } static DEVICE_ATTR_RO(suspend_disk_state); static ssize_t suspend_standby_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_state(buf, rdev->constraints->state_standby.enabled); } static DEVICE_ATTR_RO(suspend_standby_state); static ssize_t bypass_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); const char *report; bool bypass; int ret; ret = rdev->desc->ops->get_bypass(rdev, &bypass); if (ret != 0) report = "unknown"; else if (bypass) report = "enabled"; else report = "disabled"; return sprintf(buf, "%s\n", report); } static DEVICE_ATTR_RO(bypass); static ssize_t power_budget_milliwatt_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->pw_budget_mW); } static DEVICE_ATTR_RO(power_budget_milliwatt); static ssize_t power_requested_milliwatt_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->pw_requested_mW); } static DEVICE_ATTR_RO(power_requested_milliwatt); #define REGULATOR_ERROR_ATTR(name, bit) \ static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ int ret; \ unsigned int flags; \ struct regulator_dev *rdev = dev_get_drvdata(dev); \ ret = _regulator_get_error_flags(rdev, &flags); \ if (ret) \ return ret; \ return sysfs_emit(buf, "%d\n", !!(flags & (bit))); \ } \ static DEVICE_ATTR_RO(name) REGULATOR_ERROR_ATTR(under_voltage, REGULATOR_ERROR_UNDER_VOLTAGE); REGULATOR_ERROR_ATTR(over_current, REGULATOR_ERROR_OVER_CURRENT); REGULATOR_ERROR_ATTR(regulation_out, REGULATOR_ERROR_REGULATION_OUT); REGULATOR_ERROR_ATTR(fail, REGULATOR_ERROR_FAIL); REGULATOR_ERROR_ATTR(over_temp, REGULATOR_ERROR_OVER_TEMP); REGULATOR_ERROR_ATTR(under_voltage_warn, REGULATOR_ERROR_UNDER_VOLTAGE_WARN); REGULATOR_ERROR_ATTR(over_current_warn, REGULATOR_ERROR_OVER_CURRENT_WARN); REGULATOR_ERROR_ATTR(over_voltage_warn, REGULATOR_ERROR_OVER_VOLTAGE_WARN); REGULATOR_ERROR_ATTR(over_temp_warn, REGULATOR_ERROR_OVER_TEMP_WARN); /* Calculate the new optimum regulator operating mode based on the new total * consumer load. All locks held by caller */ static int drms_uA_update(struct regulator_dev *rdev) { struct regulator *sibling; int current_uA = 0, output_uV, input_uV, err; unsigned int mode; /* * first check to see if we can set modes at all, otherwise just * tell the consumer everything is OK. */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS)) { rdev_dbg(rdev, "DRMS operation not allowed\n"); return 0; } if (!rdev->desc->ops->get_optimum_mode && !rdev->desc->ops->set_load) return 0; if (!rdev->desc->ops->set_mode && !rdev->desc->ops->set_load) return -EINVAL; /* calc total requested load */ list_for_each_entry(sibling, &rdev->consumer_list, list) { if (sibling->enable_count) current_uA += sibling->uA_load; } current_uA += rdev->constraints->system_load; if (rdev->desc->ops->set_load) { /* set the optimum mode for our new total regulator load */ err = rdev->desc->ops->set_load(rdev, current_uA); if (err < 0) rdev_err(rdev, "failed to set load %d: %pe\n", current_uA, ERR_PTR(err)); } else { /* * Unfortunately in some cases the constraints->valid_ops has * REGULATOR_CHANGE_DRMS but there are no valid modes listed. * That's not really legit but we won't consider it a fatal * error here. We'll treat it as if REGULATOR_CHANGE_DRMS * wasn't set. */ if (!rdev->constraints->valid_modes_mask) { rdev_dbg(rdev, "Can change modes; but no valid mode\n"); return 0; } /* get output voltage */ output_uV = regulator_get_voltage_rdev(rdev); /* * Don't return an error; if regulator driver cares about * output_uV then it's up to the driver to validate. */ if (output_uV <= 0) rdev_dbg(rdev, "invalid output voltage found\n"); /* get input voltage */ input_uV = 0; if (rdev->supply) input_uV = regulator_get_voltage_rdev(rdev->supply->rdev); if (input_uV <= 0) input_uV = rdev->constraints->input_uV; /* * Don't return an error; if regulator driver cares about * input_uV then it's up to the driver to validate. */ if (input_uV <= 0) rdev_dbg(rdev, "invalid input voltage found\n"); /* now get the optimum mode for our new total regulator load */ mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV, output_uV, current_uA); /* check the new mode is allowed */ err = regulator_mode_constrain(rdev, &mode); if (err < 0) { rdev_err(rdev, "failed to get optimum mode @ %d uA %d -> %d uV: %pe\n", current_uA, input_uV, output_uV, ERR_PTR(err)); return err; } err = rdev->desc->ops->set_mode(rdev, mode); if (err < 0) rdev_err(rdev, "failed to set optimum mode %x: %pe\n", mode, ERR_PTR(err)); } return err; } static int __suspend_set_state(struct regulator_dev *rdev, const struct regulator_state *rstate) { int ret = 0; if (rstate->enabled == ENABLE_IN_SUSPEND && rdev->desc->ops->set_suspend_enable) ret = rdev->desc->ops->set_suspend_enable(rdev); else if (rstate->enabled == DISABLE_IN_SUSPEND && rdev->desc->ops->set_suspend_disable) ret = rdev->desc->ops->set_suspend_disable(rdev); else /* OK if set_suspend_enable or set_suspend_disable is NULL */ ret = 0; if (ret < 0) { rdev_err(rdev, "failed to enabled/disable: %pe\n", ERR_PTR(ret)); return ret; } if (rdev->desc->ops->set_suspend_voltage && rstate->uV > 0) { ret = rdev->desc->ops->set_suspend_voltage(rdev, rstate->uV); if (ret < 0) { rdev_err(rdev, "failed to set voltage: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->desc->ops->set_suspend_mode && rstate->mode > 0) { ret = rdev->desc->ops->set_suspend_mode(rdev, rstate->mode); if (ret < 0) { rdev_err(rdev, "failed to set mode: %pe\n", ERR_PTR(ret)); return ret; } } return ret; } static int suspend_set_initial_state(struct regulator_dev *rdev) { const struct regulator_state *rstate; rstate = regulator_get_suspend_state_check(rdev, rdev->constraints->initial_state); if (!rstate) return 0; return __suspend_set_state(rdev, rstate); } #if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) static void print_constraints_debug(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; char buf[160] = ""; size_t len = sizeof(buf) - 1; int count = 0; int ret; if (constraints->min_uV && constraints->max_uV) { if (constraints->min_uV == constraints->max_uV) count += scnprintf(buf + count, len - count, "%d mV ", constraints->min_uV / 1000); else count += scnprintf(buf + count, len - count, "%d <--> %d mV ", constraints->min_uV / 1000, constraints->max_uV / 1000); } if (!constraints->min_uV || constraints->min_uV != constraints->max_uV) { ret = regulator_get_voltage_rdev(rdev); if (ret > 0) count += scnprintf(buf + count, len - count, "at %d mV ", ret / 1000); } if (constraints->uV_offset) count += scnprintf(buf + count, len - count, "%dmV offset ", constraints->uV_offset / 1000); if (constraints->min_uA && constraints->max_uA) { if (constraints->min_uA == constraints->max_uA) count += scnprintf(buf + count, len - count, "%d mA ", constraints->min_uA / 1000); else count += scnprintf(buf + count, len - count, "%d <--> %d mA ", constraints->min_uA / 1000, constraints->max_uA / 1000); } if (!constraints->min_uA || constraints->min_uA != constraints->max_uA) { ret = _regulator_get_current_limit(rdev); if (ret > 0) count += scnprintf(buf + count, len - count, "at %d mA ", ret / 1000); } if (constraints->valid_modes_mask & REGULATOR_MODE_FAST) count += scnprintf(buf + count, len - count, "fast "); if (constraints->valid_modes_mask & REGULATOR_MODE_NORMAL) count += scnprintf(buf + count, len - count, "normal "); if (constraints->valid_modes_mask & REGULATOR_MODE_IDLE) count += scnprintf(buf + count, len - count, "idle "); if (constraints->valid_modes_mask & REGULATOR_MODE_STANDBY) count += scnprintf(buf + count, len - count, "standby "); if (constraints->pw_budget_mW) count += scnprintf(buf + count, len - count, "%d mW budget", constraints->pw_budget_mW); if (!count) count = scnprintf(buf, len, "no parameters"); else --count; count += scnprintf(buf + count, len - count, ", %s", _regulator_is_enabled(rdev) ? "enabled" : "disabled"); rdev_dbg(rdev, "%s\n", buf); } #else /* !DEBUG && !CONFIG_DYNAMIC_DEBUG */ static inline void print_constraints_debug(struct regulator_dev *rdev) {} #endif /* !DEBUG && !CONFIG_DYNAMIC_DEBUG */ static void print_constraints(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; print_constraints_debug(rdev); if ((constraints->min_uV != constraints->max_uV) && !regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) rdev_warn(rdev, "Voltage range but no REGULATOR_CHANGE_VOLTAGE\n"); } static int machine_constraints_voltage(struct regulator_dev *rdev, struct regulation_constraints *constraints) { const struct regulator_ops *ops = rdev->desc->ops; int ret; /* do we need to apply the constraint voltage */ if (rdev->constraints->apply_uV && rdev->constraints->min_uV && rdev->constraints->max_uV) { int target_min, target_max; int current_uV = regulator_get_voltage_rdev(rdev); if (current_uV == -ENOTRECOVERABLE) { /* This regulator can't be read and must be initialized */ rdev_info(rdev, "Setting %d-%duV\n", rdev->constraints->min_uV, rdev->constraints->max_uV); _regulator_do_set_voltage(rdev, rdev->constraints->min_uV, rdev->constraints->max_uV); current_uV = regulator_get_voltage_rdev(rdev); } if (current_uV < 0) { if (current_uV != -EPROBE_DEFER) rdev_err(rdev, "failed to get the current voltage: %pe\n", ERR_PTR(current_uV)); return current_uV; } /* * If we're below the minimum voltage move up to the * minimum voltage, if we're above the maximum voltage * then move down to the maximum. */ target_min = current_uV; target_max = current_uV; if (current_uV < rdev->constraints->min_uV) { target_min = rdev->constraints->min_uV; target_max = rdev->constraints->min_uV; } if (current_uV > rdev->constraints->max_uV) { target_min = rdev->constraints->max_uV; target_max = rdev->constraints->max_uV; } if (target_min != current_uV || target_max != current_uV) { rdev_info(rdev, "Bringing %duV into %d-%duV\n", current_uV, target_min, target_max); ret = _regulator_do_set_voltage( rdev, target_min, target_max); if (ret < 0) { rdev_err(rdev, "failed to apply %d-%duV constraint: %pe\n", target_min, target_max, ERR_PTR(ret)); return ret; } } } /* constrain machine-level voltage specs to fit * the actual range supported by this regulator. */ if (ops->list_voltage && rdev->desc->n_voltages) { int count = rdev->desc->n_voltages; int i; int min_uV = INT_MAX; int max_uV = INT_MIN; int cmin = constraints->min_uV; int cmax = constraints->max_uV; /* it's safe to autoconfigure fixed-voltage supplies * and the constraints are used by list_voltage. */ if (count == 1 && !cmin) { cmin = 1; cmax = INT_MAX; constraints->min_uV = cmin; constraints->max_uV = cmax; } /* voltage constraints are optional */ if ((cmin == 0) && (cmax == 0)) return 0; /* else require explicit machine-level constraints */ if (cmin <= 0 || cmax <= 0 || cmax < cmin) { rdev_err(rdev, "invalid voltage constraints\n"); return -EINVAL; } /* no need to loop voltages if range is continuous */ if (rdev->desc->continuous_voltage_range) return 0; /* initial: [cmin..cmax] valid, [min_uV..max_uV] not */ for (i = 0; i < count; i++) { int value; value = ops->list_voltage(rdev, i); if (value <= 0) continue; /* maybe adjust [min_uV..max_uV] */ if (value >= cmin && value < min_uV) min_uV = value; if (value <= cmax && value > max_uV) max_uV = value; } /* final: [min_uV..max_uV] valid iff constraints valid */ if (max_uV < min_uV) { rdev_err(rdev, "unsupportable voltage constraints %u-%uuV\n", min_uV, max_uV); return -EINVAL; } /* use regulator's subset of machine constraints */ if (constraints->min_uV < min_uV) { rdev_dbg(rdev, "override min_uV, %d -> %d\n", constraints->min_uV, min_uV); constraints->min_uV = min_uV; } if (constraints->max_uV > max_uV) { rdev_dbg(rdev, "override max_uV, %d -> %d\n", constraints->max_uV, max_uV); constraints->max_uV = max_uV; } } return 0; } static int machine_constraints_current(struct regulator_dev *rdev, struct regulation_constraints *constraints) { const struct regulator_ops *ops = rdev->desc->ops; int ret; if (!constraints->min_uA && !constraints->max_uA) return 0; if (constraints->min_uA > constraints->max_uA) { rdev_err(rdev, "Invalid current constraints\n"); return -EINVAL; } if (!ops->set_current_limit || !ops->get_current_limit) { rdev_warn(rdev, "Operation of current configuration missing\n"); return 0; } /* Set regulator current in constraints range */ ret = ops->set_current_limit(rdev, constraints->min_uA, constraints->max_uA); if (ret < 0) { rdev_err(rdev, "Failed to set current constraint, %d\n", ret); return ret; } return 0; } static int _regulator_do_enable(struct regulator_dev *rdev); static int notif_set_limit(struct regulator_dev *rdev, int (*set)(struct regulator_dev *, int, int, bool), int limit, int severity) { bool enable; if (limit == REGULATOR_NOTIF_LIMIT_DISABLE) { enable = false; limit = 0; } else { enable = true; } if (limit == REGULATOR_NOTIF_LIMIT_ENABLE) limit = 0; return set(rdev, limit, severity, enable); } static int handle_notify_limits(struct regulator_dev *rdev, int (*set)(struct regulator_dev *, int, int, bool), struct notification_limit *limits) { int ret = 0; if (!set) return -EOPNOTSUPP; if (limits->prot) ret = notif_set_limit(rdev, set, limits->prot, REGULATOR_SEVERITY_PROT); if (ret) return ret; if (limits->err) ret = notif_set_limit(rdev, set, limits->err, REGULATOR_SEVERITY_ERR); if (ret) return ret; if (limits->warn) ret = notif_set_limit(rdev, set, limits->warn, REGULATOR_SEVERITY_WARN); return ret; } /** * set_machine_constraints - sets regulator constraints * @rdev: regulator source * * Allows platform initialisation code to define and constrain * regulator circuits e.g. valid voltage/current ranges, etc. NOTE: * Constraints *must* be set by platform code in order for some * regulator operations to proceed i.e. set_voltage, set_current_limit, * set_mode. * * Return: 0 on success or a negative error number on failure. */ static int set_machine_constraints(struct regulator_dev *rdev) { int ret = 0; const struct regulator_ops *ops = rdev->desc->ops; ret = machine_constraints_voltage(rdev, rdev->constraints); if (ret != 0) return ret; ret = machine_constraints_current(rdev, rdev->constraints); if (ret != 0) return ret; if (rdev->constraints->ilim_uA && ops->set_input_current_limit) { ret = ops->set_input_current_limit(rdev, rdev->constraints->ilim_uA); if (ret < 0) { rdev_err(rdev, "failed to set input limit: %pe\n", ERR_PTR(ret)); return ret; } } /* do we need to setup our suspend state */ if (rdev->constraints->initial_state) { ret = suspend_set_initial_state(rdev); if (ret < 0) { rdev_err(rdev, "failed to set suspend state: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->initial_mode) { if (!ops->set_mode) { rdev_err(rdev, "no set_mode operation\n"); return -EINVAL; } ret = ops->set_mode(rdev, rdev->constraints->initial_mode); if (ret < 0) { rdev_err(rdev, "failed to set initial mode: %pe\n", ERR_PTR(ret)); return ret; } } else if (rdev->constraints->system_load) { /* * We'll only apply the initial system load if an * initial mode wasn't specified. */ drms_uA_update(rdev); } if ((rdev->constraints->ramp_delay || rdev->constraints->ramp_disable) && ops->set_ramp_delay) { ret = ops->set_ramp_delay(rdev, rdev->constraints->ramp_delay); if (ret < 0) { rdev_err(rdev, "failed to set ramp_delay: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->pull_down && ops->set_pull_down) { ret = ops->set_pull_down(rdev); if (ret < 0) { rdev_err(rdev, "failed to set pull down: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->soft_start && ops->set_soft_start) { ret = ops->set_soft_start(rdev); if (ret < 0) { rdev_err(rdev, "failed to set soft start: %pe\n", ERR_PTR(ret)); return ret; } } /* * Existing logic does not warn if over_current_protection is given as * a constraint but driver does not support that. I think we should * warn about this type of issues as it is possible someone changes * PMIC on board to another type - and the another PMIC's driver does * not support setting protection. Board composer may happily believe * the DT limits are respected - especially if the new PMIC HW also * supports protection but the driver does not. I won't change the logic * without hearing more experienced opinion on this though. * * If warning is seen as a good idea then we can merge handling the * over-curret protection and detection and get rid of this special * handling. */ if (rdev->constraints->over_current_protection && ops->set_over_current_protection) { int lim = rdev->constraints->over_curr_limits.prot; ret = ops->set_over_current_protection(rdev, lim, REGULATOR_SEVERITY_PROT, true); if (ret < 0) { rdev_err(rdev, "failed to set over current protection: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->over_current_detection) ret = handle_notify_limits(rdev, ops->set_over_current_protection, &rdev->constraints->over_curr_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set over current limits: %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested over-current limits\n"); } if (rdev->constraints->over_voltage_detection) ret = handle_notify_limits(rdev, ops->set_over_voltage_protection, &rdev->constraints->over_voltage_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set over voltage limits %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested over voltage limits\n"); } if (rdev->constraints->under_voltage_detection) ret = handle_notify_limits(rdev, ops->set_under_voltage_protection, &rdev->constraints->under_voltage_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set under voltage limits %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested under voltage limits\n"); } if (rdev->constraints->over_temp_detection) ret = handle_notify_limits(rdev, ops->set_thermal_protection, &rdev->constraints->temp_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set temperature limits %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested temperature limits\n"); } if (rdev->constraints->active_discharge && ops->set_active_discharge) { bool ad_state = rdev->constraints->active_discharge == REGULATOR_ACTIVE_DISCHARGE_ENABLE; ret = ops->set_active_discharge(rdev, ad_state); if (ret < 0) { rdev_err(rdev, "failed to set active discharge: %pe\n", ERR_PTR(ret)); return ret; } } /* * If there is no mechanism for controlling the regulator then * flag it as always_on so we don't end up duplicating checks * for this so much. Note that we could control the state of * a supply to control the output on a regulator that has no * direct control. */ if (!rdev->ena_pin && !ops->enable) { if (rdev->supply_name && !rdev->supply) return -EPROBE_DEFER; if (rdev->supply) rdev->constraints->always_on = rdev->supply->rdev->constraints->always_on; else rdev->constraints->always_on = true; } /* If the constraints say the regulator should be on at this point * and we have control then make sure it is enabled. */ if (rdev->constraints->always_on || rdev->constraints->boot_on) { /* If we want to enable this regulator, make sure that we know * the supplying regulator. */ if (rdev->supply_name && !rdev->supply) return -EPROBE_DEFER; /* If supplying regulator has already been enabled, * it's not intended to have use_count increment * when rdev is only boot-on. */ if (rdev->supply && (rdev->constraints->always_on || !regulator_is_enabled(rdev->supply))) { ret = regulator_enable(rdev->supply); if (ret < 0) { _regulator_put(rdev->supply); rdev->supply = NULL; return ret; } } ret = _regulator_do_enable(rdev); if (ret < 0 && ret != -EINVAL) { rdev_err(rdev, "failed to enable: %pe\n", ERR_PTR(ret)); return ret; } if (rdev->constraints->always_on) rdev->use_count++; } else if (rdev->desc->off_on_delay) { rdev->last_off = ktime_get(); } if (!rdev->constraints->pw_budget_mW) rdev->constraints->pw_budget_mW = INT_MAX; print_constraints(rdev); return 0; } /** * set_supply - set regulator supply regulator * @rdev: regulator (locked) * @supply_rdev: supply regulator (locked)) * * Called by platform initialisation code to set the supply regulator for this * regulator. This ensures that a regulators supply will also be enabled by the * core if it's child is enabled. * * Return: 0 on success or a negative error number on failure. */ static int set_supply(struct regulator_dev *rdev, struct regulator_dev *supply_rdev) { int err; rdev_dbg(rdev, "supplied by %s\n", rdev_get_name(supply_rdev)); if (!try_module_get(supply_rdev->owner)) return -ENODEV; rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY"); if (rdev->supply == NULL) { module_put(supply_rdev->owner); err = -ENOMEM; return err; } supply_rdev->open_count++; return 0; } /** * set_consumer_device_supply - Bind a regulator to a symbolic supply * @rdev: regulator source * @consumer_dev_name: dev_name() string for device supply applies to * @supply: symbolic name for supply * * Allows platform initialisation code to map physical regulator * sources to symbolic names for supplies for use by devices. Devices * should use these symbolic names to request regulators, avoiding the * need to provide board-specific regulator names as platform data. * * Return: 0 on success or a negative error number on failure. */ static int set_consumer_device_supply(struct regulator_dev *rdev, const char *consumer_dev_name, const char *supply) { struct regulator_map *node, *new_node; int has_dev; if (supply == NULL) return -EINVAL; if (consumer_dev_name != NULL) has_dev = 1; else has_dev = 0; new_node = kzalloc(sizeof(struct regulator_map), GFP_KERNEL); if (new_node == NULL) return -ENOMEM; new_node->regulator = rdev; new_node->supply = supply; if (has_dev) { new_node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL); if (new_node->dev_name == NULL) { kfree(new_node); return -ENOMEM; } } mutex_lock(®ulator_list_mutex); list_for_each_entry(node, ®ulator_map_list, list) { if (node->dev_name && consumer_dev_name) { if (strcmp(node->dev_name, consumer_dev_name) != 0) continue; } else if (node->dev_name || consumer_dev_name) { continue; } if (strcmp(node->supply, supply) != 0) continue; pr_debug("%s: %s/%s is '%s' supply; fail %s/%s\n", consumer_dev_name, dev_name(&node->regulator->dev), node->regulator->desc->name, supply, dev_name(&rdev->dev), rdev_get_name(rdev)); goto fail; } list_add(&new_node->list, ®ulator_map_list); mutex_unlock(®ulator_list_mutex); return 0; fail: mutex_unlock(®ulator_list_mutex); kfree(new_node->dev_name); kfree(new_node); return -EBUSY; } static void unset_regulator_supplies(struct regulator_dev *rdev) { struct regulator_map *node, *n; list_for_each_entry_safe(node, n, ®ulator_map_list, list) { if (rdev == node->regulator) { list_del(&node->list); kfree(node->dev_name); kfree(node); } } } #ifdef CONFIG_DEBUG_FS static ssize_t constraint_flags_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { const struct regulator *regulator = file->private_data; const struct regulation_constraints *c = regulator->rdev->constraints; char *buf; ssize_t ret; if (!c) return 0; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; ret = snprintf(buf, PAGE_SIZE, "always_on: %u\n" "boot_on: %u\n" "apply_uV: %u\n" "ramp_disable: %u\n" "soft_start: %u\n" "pull_down: %u\n" "over_current_protection: %u\n", c->always_on, c->boot_on, c->apply_uV, c->ramp_disable, c->soft_start, c->pull_down, c->over_current_protection); ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); kfree(buf); return ret; } #endif static const struct file_operations constraint_flags_fops = { #ifdef CONFIG_DEBUG_FS .open = simple_open, .read = constraint_flags_read_file, .llseek = default_llseek, #endif }; #define REG_STR_SIZE 64 static void link_and_create_debugfs(struct regulator *regulator, struct regulator_dev *rdev, struct device *dev) { int err = 0; if (dev) { regulator->dev = dev; /* Add a link to the device sysfs entry */ err = sysfs_create_link_nowarn(&rdev->dev.kobj, &dev->kobj, regulator->supply_name); if (err) { rdev_dbg(rdev, "could not add device link %s: %pe\n", dev->kobj.name, ERR_PTR(err)); /* non-fatal */ } } if (err != -EEXIST) { regulator->debugfs = debugfs_create_dir(regulator->supply_name, rdev->debugfs); if (IS_ERR(regulator->debugfs)) { rdev_dbg(rdev, "Failed to create debugfs directory\n"); regulator->debugfs = NULL; } } if (regulator->debugfs) { debugfs_create_u32("uA_load", 0444, regulator->debugfs, ®ulator->uA_load); debugfs_create_u32("min_uV", 0444, regulator->debugfs, ®ulator->voltage[PM_SUSPEND_ON].min_uV); debugfs_create_u32("max_uV", 0444, regulator->debugfs, ®ulator->voltage[PM_SUSPEND_ON].max_uV); debugfs_create_file("constraint_flags", 0444, regulator->debugfs, regulator, &constraint_flags_fops); } } static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, const char *supply_name) { struct regulator *regulator; lockdep_assert_held_once(&rdev->mutex.base); if (dev) { char buf[REG_STR_SIZE]; int size; size = snprintf(buf, REG_STR_SIZE, "%s-%s", dev->kobj.name, supply_name); if (size >= REG_STR_SIZE) return NULL; supply_name = kstrdup(buf, GFP_KERNEL); if (supply_name == NULL) return NULL; } else { supply_name = kstrdup_const(supply_name, GFP_KERNEL); if (supply_name == NULL) return NULL; } regulator = kzalloc(sizeof(*regulator), GFP_KERNEL); if (regulator == NULL) { kfree_const(supply_name); return NULL; } regulator->rdev = rdev; regulator->supply_name = supply_name; list_add(®ulator->list, &rdev->consumer_list); /* * Check now if the regulator is an always on regulator - if * it is then we don't need to do nearly so much work for * enable/disable calls. */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS) && _regulator_is_enabled(rdev)) regulator->always_on = true; return regulator; } static int _regulator_get_enable_time(struct regulator_dev *rdev) { if (rdev->constraints && rdev->constraints->enable_time) return rdev->constraints->enable_time; if (rdev->desc->ops->enable_time) return rdev->desc->ops->enable_time(rdev); return rdev->desc->enable_time; } static struct regulator_supply_alias *regulator_find_supply_alias( struct device *dev, const char *supply) { struct regulator_supply_alias *map; list_for_each_entry(map, ®ulator_supply_alias_list, list) if (map->src_dev == dev && strcmp(map->src_supply, supply) == 0) return map; return NULL; } static void regulator_supply_alias(struct device **dev, const char **supply) { struct regulator_supply_alias *map; map = regulator_find_supply_alias(*dev, *supply); if (map) { dev_dbg(*dev, "Mapping supply %s to %s,%s\n", *supply, map->alias_supply, dev_name(map->alias_dev)); *dev = map->alias_dev; *supply = map->alias_supply; } } static int regulator_match(struct device *dev, const void *data) { struct regulator_dev *r = dev_to_rdev(dev); return strcmp(rdev_get_name(r), data) == 0; } static struct regulator_dev *regulator_lookup_by_name(const char *name) { struct device *dev; dev = class_find_device(®ulator_class, NULL, name, regulator_match); return dev ? dev_to_rdev(dev) : NULL; } static struct regulator_dev *regulator_dt_lookup(struct device *dev, const char *supply) { struct regulator_dev *r = NULL; if (dev_of_node(dev)) { r = of_regulator_dev_lookup(dev, dev_of_node(dev), supply); if (PTR_ERR(r) == -ENODEV) r = NULL; } return r; } /** * regulator_dev_lookup - lookup a regulator device. * @dev: device for regulator "consumer". * @supply: Supply name or regulator ID. * * Return: pointer to &struct regulator_dev or ERR_PTR() encoded negative error number. * * If successful, returns a struct regulator_dev that corresponds to the name * @supply and with the embedded struct device refcount incremented by one. * The refcount must be dropped by calling put_device(). * On failure one of the following ERR_PTR() encoded values is returned: * -%ENODEV if lookup fails permanently, -%EPROBE_DEFER if lookup could succeed * in the future. */ static struct regulator_dev *regulator_dev_lookup(struct device *dev, const char *supply) { struct regulator_dev *r = NULL; struct regulator_map *map; const char *devname = NULL; regulator_supply_alias(&dev, &supply); /* first do a dt based lookup */ r = regulator_dt_lookup(dev, supply); if (r) return r; /* if not found, try doing it non-dt way */ if (dev) devname = dev_name(dev); mutex_lock(®ulator_list_mutex); list_for_each_entry(map, ®ulator_map_list, list) { /* If the mapping has a device set up it must match */ if (map->dev_name && (!devname || strcmp(map->dev_name, devname))) continue; if (strcmp(map->supply, supply) == 0 && get_device(&map->regulator->dev)) { r = map->regulator; break; } } mutex_unlock(®ulator_list_mutex); if (r) return r; r = regulator_lookup_by_name(supply); if (r) return r; return ERR_PTR(-ENODEV); } static int regulator_resolve_supply(struct regulator_dev *rdev) { struct regulator_dev *r; struct device *dev = rdev->dev.parent; struct ww_acquire_ctx ww_ctx; int ret = 0; /* No supply to resolve? */ if (!rdev->supply_name) return 0; /* Supply already resolved? (fast-path without locking contention) */ if (rdev->supply) return 0; /* first do a dt based lookup on the node described in the virtual * device. */ r = regulator_dt_lookup(&rdev->dev, rdev->supply_name); /* If regulator not found use usual search path in the parent * device. */ if (!r) r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); /* Did the lookup explicitly defer for us? */ if (ret == -EPROBE_DEFER) goto out; if (have_full_constraints()) { r = dummy_regulator_rdev; if (!r) { ret = -EPROBE_DEFER; goto out; } get_device(&r->dev); } else { dev_err(dev, "Failed to resolve %s-supply for %s\n", rdev->supply_name, rdev->desc->name); ret = -EPROBE_DEFER; goto out; } } if (r == rdev) { dev_err(dev, "Supply for %s (%s) resolved to itself\n", rdev->desc->name, rdev->supply_name); if (!have_full_constraints()) { ret = -EINVAL; goto out; } r = dummy_regulator_rdev; if (!r) { ret = -EPROBE_DEFER; goto out; } get_device(&r->dev); } /* * If the supply's parent device is not the same as the * regulator's parent device, then ensure the parent device * is bound before we resolve the supply, in case the parent * device get probe deferred and unregisters the supply. */ if (r->dev.parent && r->dev.parent != rdev->dev.parent) { if (!device_is_bound(r->dev.parent)) { put_device(&r->dev); ret = -EPROBE_DEFER; goto out; } } /* Recursively resolve the supply of the supply */ ret = regulator_resolve_supply(r); if (ret < 0) { put_device(&r->dev); goto out; } /* * Recheck rdev->supply with rdev->mutex lock held to avoid a race * between rdev->supply null check and setting rdev->supply in * set_supply() from concurrent tasks. */ regulator_lock_two(rdev, r, &ww_ctx); /* Supply just resolved by a concurrent task? */ if (rdev->supply) { regulator_unlock_two(rdev, r, &ww_ctx); put_device(&r->dev); goto out; } ret = set_supply(rdev, r); if (ret < 0) { regulator_unlock_two(rdev, r, &ww_ctx); put_device(&r->dev); goto out; } regulator_unlock_two(rdev, r, &ww_ctx); /* rdev->supply was created in set_supply() */ link_and_create_debugfs(rdev->supply, r, &rdev->dev); /* * In set_machine_constraints() we may have turned this regulator on * but we couldn't propagate to the supply if it hadn't been resolved * yet. Do it now. */ if (rdev->use_count) { ret = regulator_enable(rdev->supply); if (ret < 0) { _regulator_put(rdev->supply); rdev->supply = NULL; goto out; } } out: return ret; } /* common pre-checks for regulator requests */ int _regulator_get_common_check(struct device *dev, const char *id, enum regulator_get_type get_type) { if (get_type >= MAX_GET_TYPE) { dev_err(dev, "invalid type %d in %s\n", get_type, __func__); return -EINVAL; } if (id == NULL) { dev_err(dev, "regulator request with no identifier\n"); return -EINVAL; } return 0; } /** * _regulator_get_common - Common code for regulator requests * @rdev: regulator device pointer as returned by *regulator_dev_lookup() * Its reference count is expected to have been incremented. * @dev: device used for dev_printk messages * @id: Supply name or regulator ID * @get_type: enum regulator_get_type value corresponding to type of request * * Returns: pointer to struct regulator corresponding to @rdev, or ERR_PTR() * encoded error. * * This function should be chained with *regulator_dev_lookup() functions. */ struct regulator *_regulator_get_common(struct regulator_dev *rdev, struct device *dev, const char *id, enum regulator_get_type get_type) { struct regulator *regulator; struct device_link *link; int ret; if (IS_ERR(rdev)) { ret = PTR_ERR(rdev); /* * If regulator_dev_lookup() fails with error other * than -ENODEV our job here is done, we simply return it. */ if (ret != -ENODEV) return ERR_PTR(ret); if (!have_full_constraints()) { dev_warn(dev, "incomplete constraints, dummy supplies not allowed (id=%s)\n", id); return ERR_PTR(-ENODEV); } switch (get_type) { case NORMAL_GET: /* * Assume that a regulator is physically present and * enabled, even if it isn't hooked up, and just * provide a dummy. */ rdev = dummy_regulator_rdev; if (!rdev) return ERR_PTR(-EPROBE_DEFER); dev_warn(dev, "supply %s not found, using dummy regulator\n", id); get_device(&rdev->dev); break; case EXCLUSIVE_GET: dev_warn(dev, "dummy supplies not allowed for exclusive requests (id=%s)\n", id); fallthrough; default: return ERR_PTR(-ENODEV); } } if (rdev->exclusive) { regulator = ERR_PTR(-EPERM); put_device(&rdev->dev); return regulator; } if (get_type == EXCLUSIVE_GET && rdev->open_count) { regulator = ERR_PTR(-EBUSY); put_device(&rdev->dev); return regulator; } mutex_lock(®ulator_list_mutex); ret = (rdev->coupling_desc.n_resolved != rdev->coupling_desc.n_coupled); mutex_unlock(®ulator_list_mutex); if (ret != 0) { regulator = ERR_PTR(-EPROBE_DEFER); put_device(&rdev->dev); return regulator; } ret = regulator_resolve_supply(rdev); if (ret < 0) { regulator = ERR_PTR(ret); put_device(&rdev->dev); return regulator; } if (!try_module_get(rdev->owner)) { regulator = ERR_PTR(-EPROBE_DEFER); put_device(&rdev->dev); return regulator; } regulator_lock(rdev); regulator = create_regulator(rdev, dev, id); regulator_unlock(rdev); if (regulator == NULL) { regulator = ERR_PTR(-ENOMEM); module_put(rdev->owner); put_device(&rdev->dev); return regulator; } link_and_create_debugfs(regulator, rdev, dev); rdev->open_count++; if (get_type == EXCLUSIVE_GET) { rdev->exclusive = 1; ret = _regulator_is_enabled(rdev); if (ret > 0) { rdev->use_count = 1; regulator->enable_count = 1; /* Propagate the regulator state to its supply */ if (rdev->supply) { ret = regulator_enable(rdev->supply); if (ret < 0) { destroy_regulator(regulator); module_put(rdev->owner); put_device(&rdev->dev); return ERR_PTR(ret); } } } else { rdev->use_count = 0; regulator->enable_count = 0; } } link = device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS); if (!IS_ERR_OR_NULL(link)) regulator->device_link = true; return regulator; } /* Internal regulator request function */ struct regulator *_regulator_get(struct device *dev, const char *id, enum regulator_get_type get_type) { struct regulator_dev *rdev; int ret; ret = _regulator_get_common_check(dev, id, get_type); if (ret) return ERR_PTR(ret); rdev = regulator_dev_lookup(dev, id); return _regulator_get_common(rdev, dev, id, get_type); } /** * regulator_get - lookup and obtain a reference to a regulator. * @dev: device for regulator "consumer" * @id: Supply name or regulator ID. * * Use of supply names configured via set_consumer_device_supply() is * strongly encouraged. It is recommended that the supply name used * should match the name used for the supply and/or the relevant * device pins in the datasheet. * * Return: Pointer to a &struct regulator corresponding to the regulator * producer, or an ERR_PTR() encoded negative error number. */ struct regulator *regulator_get(struct device *dev, const char *id) { return _regulator_get(dev, id, NORMAL_GET); } EXPORT_SYMBOL_GPL(regulator_get); /** * regulator_get_exclusive - obtain exclusive access to a regulator. * @dev: device for regulator "consumer" * @id: Supply name or regulator ID. * * Other consumers will be unable to obtain this regulator while this * reference is held and the use count for the regulator will be * initialised to reflect the current state of the regulator. * * This is intended for use by consumers which cannot tolerate shared * use of the regulator such as those which need to force the * regulator off for correct operation of the hardware they are * controlling. * * Use of supply names configured via set_consumer_device_supply() is * strongly encouraged. It is recommended that the supply name used * should match the name used for the supply and/or the relevant * device pins in the datasheet. * * Return: Pointer to a &struct regulator corresponding to the regulator * producer, or an ERR_PTR() encoded negative error number. */ struct regulator *regulator_get_exclusive(struct device *dev, const char *id) { return _regulator_get(dev, id, EXCLUSIVE_GET); } EXPORT_SYMBOL_GPL(regulator_get_exclusive); /** * regulator_get_optional - obtain optional access to a regulator. * @dev: device for regulator "consumer" * @id: Supply name or regulator ID. * * This is intended for use by consumers for devices which can have * some supplies unconnected in normal use, such as some MMC devices. * It can allow the regulator core to provide stub supplies for other * supplies requested using normal regulator_get() calls without * disrupting the operation of drivers that can handle absent * supplies. * * Use of supply names configured via set_consumer_device_supply() is * strongly encouraged. It is recommended that the supply name used * should match the name used for the supply and/or the relevant * device pins in the datasheet. * * Return: Pointer to a &struct regulator corresponding to the regulator * producer, or an ERR_PTR() encoded negative error number. */ struct regulator *regulator_get_optional(struct device *dev, const char *id) { return _regulator_get(dev, id, OPTIONAL_GET); } EXPORT_SYMBOL_GPL(regulator_get_optional); static void destroy_regulator(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; debugfs_remove_recursive(regulator->debugfs); if (regulator->dev) { if (regulator->device_link) device_link_remove(regulator->dev, &rdev->dev); /* remove any sysfs entries */ sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); } regulator_lock(rdev); list_del(®ulator->list); rdev->open_count--; rdev->exclusive = 0; regulator_unlock(rdev); kfree_const(regulator->supply_name); kfree(regulator); } /* regulator_list_mutex lock held by regulator_put() */ static void _regulator_put(struct regulator *regulator) { struct regulator_dev *rdev; if (IS_ERR_OR_NULL(regulator)) return; lockdep_assert_held_once(®ulator_list_mutex); /* Docs say you must disable before calling regulator_put() */ WARN_ON(regulator->enable_count); rdev = regulator->rdev; destroy_regulator(regulator); module_put(rdev->owner); put_device(&rdev->dev); } /** * regulator_put - "free" the regulator source * @regulator: regulator source * * Note: drivers must ensure that all regulator_enable calls made on this * regulator source are balanced by regulator_disable calls prior to calling * this function. */ void regulator_put(struct regulator *regulator) { mutex_lock(®ulator_list_mutex); _regulator_put(regulator); mutex_unlock(®ulator_list_mutex); } EXPORT_SYMBOL_GPL(regulator_put); /** * regulator_register_supply_alias - Provide device alias for supply lookup * * @dev: device that will be given as the regulator "consumer" * @id: Supply name or regulator ID * @alias_dev: device that should be used to lookup the supply * @alias_id: Supply name or regulator ID that should be used to lookup the * supply * * All lookups for id on dev will instead be conducted for alias_id on * alias_dev. * * Return: 0 on success or a negative error number on failure. */ int regulator_register_supply_alias(struct device *dev, const char *id, struct device *alias_dev, const char *alias_id) { struct regulator_supply_alias *map; map = regulator_find_supply_alias(dev, id); if (map) return -EEXIST; map = kzalloc(sizeof(struct regulator_supply_alias), GFP_KERNEL); if (!map) return -ENOMEM; map->src_dev = dev; map->src_supply = id; map->alias_dev = alias_dev; map->alias_supply = alias_id; list_add(&map->list, ®ulator_supply_alias_list); pr_info("Adding alias for supply %s,%s -> %s,%s\n", id, dev_name(dev), alias_id, dev_name(alias_dev)); return 0; } EXPORT_SYMBOL_GPL(regulator_register_supply_alias); /** * regulator_unregister_supply_alias - Remove device alias * * @dev: device that will be given as the regulator "consumer" * @id: Supply name or regulator ID * * Remove a lookup alias if one exists for id on dev. */ void regulator_unregister_supply_alias(struct device *dev, const char *id) { struct regulator_supply_alias *map; map = regulator_find_supply_alias(dev, id); if (map) { list_del(&map->list); kfree(map); } } EXPORT_SYMBOL_GPL(regulator_unregister_supply_alias); /** * regulator_bulk_register_supply_alias - register multiple aliases * * @dev: device that will be given as the regulator "consumer" * @id: List of supply names or regulator IDs * @alias_dev: device that should be used to lookup the supply * @alias_id: List of supply names or regulator IDs that should be used to * lookup the supply * @num_id: Number of aliases to register * * This helper function allows drivers to register several supply * aliases in one operation. If any of the aliases cannot be * registered any aliases that were registered will be removed * before returning to the caller. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_register_supply_alias(struct device *dev, const char *const *id, struct device *alias_dev, const char *const *alias_id, int num_id) { int i; int ret; for (i = 0; i < num_id; ++i) { ret = regulator_register_supply_alias(dev, id[i], alias_dev, alias_id[i]); if (ret < 0) goto err; } return 0; err: dev_err(dev, "Failed to create supply alias %s,%s -> %s,%s\n", id[i], dev_name(dev), alias_id[i], dev_name(alias_dev)); while (--i >= 0) regulator_unregister_supply_alias(dev, id[i]); return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_register_supply_alias); /** * regulator_bulk_unregister_supply_alias - unregister multiple aliases * * @dev: device that will be given as the regulator "consumer" * @id: List of supply names or regulator IDs * @num_id: Number of aliases to unregister * * This helper function allows drivers to unregister several supply * aliases in one operation. */ void regulator_bulk_unregister_supply_alias(struct device *dev, const char *const *id, int num_id) { int i; for (i = 0; i < num_id; ++i) regulator_unregister_supply_alias(dev, id[i]); } EXPORT_SYMBOL_GPL(regulator_bulk_unregister_supply_alias); /* Manage enable GPIO list. Same GPIO pin can be shared among regulators */ static int regulator_ena_gpio_request(struct regulator_dev *rdev, const struct regulator_config *config) { struct regulator_enable_gpio *pin, *new_pin; struct gpio_desc *gpiod; gpiod = config->ena_gpiod; new_pin = kzalloc(sizeof(*new_pin), GFP_KERNEL); mutex_lock(®ulator_list_mutex); list_for_each_entry(pin, ®ulator_ena_gpio_list, list) { if (gpiod_is_equal(pin->gpiod, gpiod)) { rdev_dbg(rdev, "GPIO is already used\n"); goto update_ena_gpio_to_rdev; } } if (new_pin == NULL) { mutex_unlock(®ulator_list_mutex); return -ENOMEM; } pin = new_pin; new_pin = NULL; pin->gpiod = gpiod; list_add(&pin->list, ®ulator_ena_gpio_list); update_ena_gpio_to_rdev: pin->request_count++; rdev->ena_pin = pin; mutex_unlock(®ulator_list_mutex); kfree(new_pin); return 0; } static void regulator_ena_gpio_free(struct regulator_dev *rdev) { struct regulator_enable_gpio *pin, *n; if (!rdev->ena_pin) return; /* Free the GPIO only in case of no use */ list_for_each_entry_safe(pin, n, ®ulator_ena_gpio_list, list) { if (pin != rdev->ena_pin) continue; if (--pin->request_count) break; gpiod_put(pin->gpiod); list_del(&pin->list); kfree(pin); break; } rdev->ena_pin = NULL; } /** * regulator_ena_gpio_ctrl - balance enable_count of each GPIO and actual GPIO pin control * @rdev: regulator_dev structure * @enable: enable GPIO at initial use? * * GPIO is enabled in case of initial use. (enable_count is 0) * GPIO is disabled when it is not shared any more. (enable_count <= 1) * * Return: 0 on success or a negative error number on failure. */ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable) { struct regulator_enable_gpio *pin = rdev->ena_pin; if (!pin) return -EINVAL; if (enable) { /* Enable GPIO at initial use */ if (pin->enable_count == 0) gpiod_set_value_cansleep(pin->gpiod, 1); pin->enable_count++; } else { if (pin->enable_count > 1) { pin->enable_count--; return 0; } /* Disable GPIO if not used */ if (pin->enable_count <= 1) { gpiod_set_value_cansleep(pin->gpiod, 0); pin->enable_count = 0; } } return 0; } /** * _regulator_check_status_enabled - check if regulator status can be * interpreted as "regulator is enabled" * @rdev: the regulator device to check * * Return: * * 1 - if status shows regulator is in enabled state * * 0 - if not enabled state * * Error Value - as received from ops->get_status() */ static inline int _regulator_check_status_enabled(struct regulator_dev *rdev) { int ret = rdev->desc->ops->get_status(rdev); if (ret < 0) { rdev_info(rdev, "get_status returned error: %d\n", ret); return ret; } switch (ret) { case REGULATOR_STATUS_OFF: case REGULATOR_STATUS_ERROR: case REGULATOR_STATUS_UNDEFINED: return 0; default: return 1; } } static int _regulator_do_enable(struct regulator_dev *rdev) { int ret, delay; /* Query before enabling in case configuration dependent. */ ret = _regulator_get_enable_time(rdev); if (ret >= 0) { delay = ret; } else { rdev_warn(rdev, "enable_time() failed: %pe\n", ERR_PTR(ret)); delay = 0; } trace_regulator_enable(rdev_get_name(rdev)); if (rdev->desc->off_on_delay) { /* if needed, keep a distance of off_on_delay from last time * this regulator was disabled. */ ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay); s64 remaining = ktime_us_delta(end, ktime_get_boottime()); if (remaining > 0) fsleep(remaining); } if (rdev->ena_pin) { if (!rdev->ena_gpio_state) { ret = regulator_ena_gpio_ctrl(rdev, true); if (ret < 0) return ret; rdev->ena_gpio_state = 1; } } else if (rdev->desc->ops->enable) { ret = rdev->desc->ops->enable(rdev); if (ret < 0) return ret; } else { return -EINVAL; } /* Allow the regulator to ramp; it would be useful to extend * this for bulk operations so that the regulators can ramp * together. */ trace_regulator_enable_delay(rdev_get_name(rdev)); /* If poll_enabled_time is set, poll upto the delay calculated * above, delaying poll_enabled_time uS to check if the regulator * actually got enabled. * If the regulator isn't enabled after our delay helper has expired, * return -ETIMEDOUT. */ if (rdev->desc->poll_enabled_time) { int time_remaining = delay; while (time_remaining > 0) { fsleep(rdev->desc->poll_enabled_time); if (rdev->desc->ops->get_status) { ret = _regulator_check_status_enabled(rdev); if (ret < 0) return ret; else if (ret) break; } else if (rdev->desc->ops->is_enabled(rdev)) break; time_remaining -= rdev->desc->poll_enabled_time; } if (time_remaining <= 0) { rdev_err(rdev, "Enabled check timed out\n"); return -ETIMEDOUT; } } else { fsleep(delay); } trace_regulator_enable_complete(rdev_get_name(rdev)); return 0; } /** * _regulator_handle_consumer_enable - handle that a consumer enabled * @regulator: regulator source * * Some things on a regulator consumer (like the contribution towards total * load on the regulator) only have an effect when the consumer wants the * regulator enabled. Explained in example with two consumers of the same * regulator: * consumer A: set_load(100); => total load = 0 * consumer A: regulator_enable(); => total load = 100 * consumer B: set_load(1000); => total load = 100 * consumer B: regulator_enable(); => total load = 1100 * consumer A: regulator_disable(); => total_load = 1000 * * This function (together with _regulator_handle_consumer_disable) is * responsible for keeping track of the refcount for a given regulator consumer * and applying / unapplying these things. * * Return: 0 on success or negative error number on failure. */ static int _regulator_handle_consumer_enable(struct regulator *regulator) { int ret; struct regulator_dev *rdev = regulator->rdev; lockdep_assert_held_once(&rdev->mutex.base); regulator->enable_count++; if (regulator->uA_load && regulator->enable_count == 1) { ret = drms_uA_update(rdev); if (ret) regulator->enable_count--; return ret; } return 0; } /** * _regulator_handle_consumer_disable - handle that a consumer disabled * @regulator: regulator source * * The opposite of _regulator_handle_consumer_enable(). * * Return: 0 on success or a negative error number on failure. */ static int _regulator_handle_consumer_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; lockdep_assert_held_once(&rdev->mutex.base); if (!regulator->enable_count) { rdev_err(rdev, "Underflow of regulator enable count\n"); return -EINVAL; } regulator->enable_count--; if (regulator->uA_load && regulator->enable_count == 0) return drms_uA_update(rdev); return 0; } /* locks held by regulator_enable() */ static int _regulator_enable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; int ret; lockdep_assert_held_once(&rdev->mutex.base); if (rdev->use_count == 0 && rdev->supply) { ret = _regulator_enable(rdev->supply); if (ret < 0) return ret; } /* balance only if there are regulators coupled */ if (rdev->coupling_desc.n_coupled > 1) { ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); if (ret < 0) goto err_disable_supply; } ret = _regulator_handle_consumer_enable(regulator); if (ret < 0) goto err_disable_supply; if (rdev->use_count == 0) { /* * The regulator may already be enabled if it's not switchable * or was left on */ ret = _regulator_is_enabled(rdev); if (ret == -EINVAL || ret == 0) { if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { ret = -EPERM; goto err_consumer_disable; } ret = _regulator_do_enable(rdev); if (ret < 0) goto err_consumer_disable; _notifier_call_chain(rdev, REGULATOR_EVENT_ENABLE, NULL); } else if (ret < 0) { rdev_err(rdev, "is_enabled() failed: %pe\n", ERR_PTR(ret)); goto err_consumer_disable; } /* Fallthrough on positive return values - already enabled */ } if (regulator->enable_count == 1) rdev->use_count++; return 0; err_consumer_disable: _regulator_handle_consumer_disable(regulator); err_disable_supply: if (rdev->use_count == 0 && rdev->supply) _regulator_disable(rdev->supply); return ret; } /** * regulator_enable - enable regulator output * @regulator: regulator source * * Request that the regulator be enabled with the regulator output at * the predefined voltage or current value. Calls to regulator_enable() * must be balanced with calls to regulator_disable(). * * NOTE: the output value can be set by other drivers, boot loader or may be * hardwired in the regulator. * * Return: 0 on success or a negative error number on failure. */ int regulator_enable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_enable(regulator); regulator_unlock_dependent(rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_enable); static int _regulator_do_disable(struct regulator_dev *rdev) { int ret; trace_regulator_disable(rdev_get_name(rdev)); if (rdev->ena_pin) { if (rdev->ena_gpio_state) { ret = regulator_ena_gpio_ctrl(rdev, false); if (ret < 0) return ret; rdev->ena_gpio_state = 0; } } else if (rdev->desc->ops->disable) { ret = rdev->desc->ops->disable(rdev); if (ret != 0) return ret; } if (rdev->desc->off_on_delay) rdev->last_off = ktime_get_boottime(); trace_regulator_disable_complete(rdev_get_name(rdev)); return 0; } /* locks held by regulator_disable() */ static int _regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; int ret = 0; lockdep_assert_held_once(&rdev->mutex.base); if (WARN(regulator->enable_count == 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) return -EIO; if (regulator->enable_count == 1) { /* disabling last enable_count from this regulator */ /* are we the last user and permitted to disable ? */ if (rdev->use_count == 1 && (rdev->constraints && !rdev->constraints->always_on)) { /* we are last user */ if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_DISABLE, NULL); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = _regulator_do_disable(rdev); if (ret < 0) { rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_DISABLE, NULL); return ret; } _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, NULL); } rdev->use_count = 0; } else if (rdev->use_count > 1) { rdev->use_count--; } } if (ret == 0) ret = _regulator_handle_consumer_disable(regulator); if (ret == 0 && rdev->coupling_desc.n_coupled > 1) ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); if (ret == 0 && rdev->use_count == 0 && rdev->supply) ret = _regulator_disable(rdev->supply); return ret; } /** * regulator_disable - disable regulator output * @regulator: regulator source * * Disable the regulator output voltage or current. Calls to * regulator_enable() must be balanced with calls to * regulator_disable(). * * NOTE: this will only disable the regulator output if no other consumer * devices have it enabled, the regulator device supports disabling and * machine constraints permit this operation. * * Return: 0 on success or a negative error number on failure. */ int regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_disable(regulator); regulator_unlock_dependent(rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_disable); /* locks held by regulator_force_disable() */ static int _regulator_force_disable(struct regulator_dev *rdev) { int ret = 0; lockdep_assert_held_once(&rdev->mutex.base); ret = _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | REGULATOR_EVENT_PRE_DISABLE, NULL); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = _regulator_do_disable(rdev); if (ret < 0) { rdev_err(rdev, "failed to force disable: %pe\n", ERR_PTR(ret)); _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | REGULATOR_EVENT_ABORT_DISABLE, NULL); return ret; } _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | REGULATOR_EVENT_DISABLE, NULL); return 0; } /** * regulator_force_disable - force disable regulator output * @regulator: regulator source * * Forcibly disable the regulator output voltage or current. * NOTE: this *will* disable the regulator output even if other consumer * devices have it enabled. This should be used for situations when device * damage will likely occur if the regulator is not disabled (e.g. over temp). * * Return: 0 on success or a negative error number on failure. */ int regulator_force_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_force_disable(regulator->rdev); if (rdev->coupling_desc.n_coupled > 1) regulator_balance_voltage(rdev, PM_SUSPEND_ON); if (regulator->uA_load) { regulator->uA_load = 0; ret = drms_uA_update(rdev); } if (rdev->use_count != 0 && rdev->supply) _regulator_disable(rdev->supply); regulator_unlock_dependent(rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_force_disable); static void regulator_disable_work(struct work_struct *work) { struct regulator_dev *rdev = container_of(work, struct regulator_dev, disable_work.work); struct ww_acquire_ctx ww_ctx; int count, i, ret; struct regulator *regulator; int total_count = 0; regulator_lock_dependent(rdev, &ww_ctx); /* * Workqueue functions queue the new work instance while the previous * work instance is being processed. Cancel the queued work instance * as the work instance under processing does the job of the queued * work instance. */ cancel_delayed_work(&rdev->disable_work); list_for_each_entry(regulator, &rdev->consumer_list, list) { count = regulator->deferred_disables; if (!count) continue; total_count += count; regulator->deferred_disables = 0; for (i = 0; i < count; i++) { ret = _regulator_disable(regulator); if (ret != 0) rdev_err(rdev, "Deferred disable failed: %pe\n", ERR_PTR(ret)); } } WARN_ON(!total_count); if (rdev->coupling_desc.n_coupled > 1) regulator_balance_voltage(rdev, PM_SUSPEND_ON); regulator_unlock_dependent(rdev, &ww_ctx); } /** * regulator_disable_deferred - disable regulator output with delay * @regulator: regulator source * @ms: milliseconds until the regulator is disabled * * Execute regulator_disable() on the regulator after a delay. This * is intended for use with devices that require some time to quiesce. * * NOTE: this will only disable the regulator output if no other consumer * devices have it enabled, the regulator device supports disabling and * machine constraints permit this operation. * * Return: 0 on success or a negative error number on failure. */ int regulator_disable_deferred(struct regulator *regulator, int ms) { struct regulator_dev *rdev = regulator->rdev; if (!ms) return regulator_disable(regulator); regulator_lock(rdev); regulator->deferred_disables++; mod_delayed_work(system_power_efficient_wq, &rdev->disable_work, msecs_to_jiffies(ms)); regulator_unlock(rdev); return 0; } EXPORT_SYMBOL_GPL(regulator_disable_deferred); static int _regulator_is_enabled(struct regulator_dev *rdev) { /* A GPIO control always takes precedence */ if (rdev->ena_pin) return rdev->ena_gpio_state; /* If we don't know then assume that the regulator is always on */ if (!rdev->desc->ops->is_enabled) return 1; return rdev->desc->ops->is_enabled(rdev); } static int _regulator_list_voltage(struct regulator_dev *rdev, unsigned selector, int lock) { const struct regulator_ops *ops = rdev->desc->ops; int ret; if (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1 && !selector) return rdev->desc->fixed_uV; if (ops->list_voltage) { if (selector >= rdev->desc->n_voltages) return -EINVAL; if (selector < rdev->desc->linear_min_sel) return 0; if (lock) regulator_lock(rdev); ret = ops->list_voltage(rdev, selector); if (lock) regulator_unlock(rdev); } else if (rdev->is_switch && rdev->supply) { ret = _regulator_list_voltage(rdev->supply->rdev, selector, lock); } else { return -EINVAL; } if (ret > 0) { if (ret < rdev->constraints->min_uV) ret = 0; else if (ret > rdev->constraints->max_uV) ret = 0; } return ret; } /** * regulator_is_enabled - is the regulator output enabled * @regulator: regulator source * * Note that the device backing this regulator handle can have multiple * users, so it might be enabled even if regulator_enable() was never * called for this particular source. * * Return: Positive if the regulator driver backing the source/client * has requested that the device be enabled, zero if it hasn't, * else a negative error number. */ int regulator_is_enabled(struct regulator *regulator) { int ret; if (regulator->always_on) return 1; regulator_lock(regulator->rdev); ret = _regulator_is_enabled(regulator->rdev); regulator_unlock(regulator->rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_is_enabled); /** * regulator_count_voltages - count regulator_list_voltage() selectors * @regulator: regulator source * * Return: Number of selectors for @regulator, or negative error number. * * Selectors are numbered starting at zero, and typically correspond to * bitfields in hardware registers. */ int regulator_count_voltages(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; if (rdev->desc->n_voltages) return rdev->desc->n_voltages; if (!rdev->is_switch || !rdev->supply) return -EINVAL; return regulator_count_voltages(rdev->supply); } EXPORT_SYMBOL_GPL(regulator_count_voltages); /** * regulator_list_voltage - enumerate supported voltages * @regulator: regulator source * @selector: identify voltage to list * Context: can sleep * * Return: Voltage for @selector that can be passed to regulator_set_voltage(), * 0 if @selector can't be used on this system, or a negative error * number on failure. */ int regulator_list_voltage(struct regulator *regulator, unsigned selector) { return _regulator_list_voltage(regulator->rdev, selector, 1); } EXPORT_SYMBOL_GPL(regulator_list_voltage); /** * regulator_get_regmap - get the regulator's register map * @regulator: regulator source * * Return: Pointer to the &struct regmap for @regulator, or ERR_PTR() * encoded -%EOPNOTSUPP if @regulator doesn't use regmap. */ struct regmap *regulator_get_regmap(struct regulator *regulator) { struct regmap *map = regulator->rdev->regmap; return map ? map : ERR_PTR(-EOPNOTSUPP); } EXPORT_SYMBOL_GPL(regulator_get_regmap); /** * regulator_get_hardware_vsel_register - get the HW voltage selector register * @regulator: regulator source * @vsel_reg: voltage selector register, output parameter * @vsel_mask: mask for voltage selector bitfield, output parameter * * Returns the hardware register offset and bitmask used for setting the * regulator voltage. This might be useful when configuring voltage-scaling * hardware or firmware that can make I2C requests behind the kernel's back, * for example. * * Return: 0 on success, or -%EOPNOTSUPP if the regulator does not support * voltage selectors. * * On success, the output parameters @vsel_reg and @vsel_mask are filled in * and 0 is returned, otherwise a negative error number is returned. */ int regulator_get_hardware_vsel_register(struct regulator *regulator, unsigned *vsel_reg, unsigned *vsel_mask) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap) return -EOPNOTSUPP; *vsel_reg = rdev->desc->vsel_reg; *vsel_mask = rdev->desc->vsel_mask; return 0; } EXPORT_SYMBOL_GPL(regulator_get_hardware_vsel_register); /** * regulator_list_hardware_vsel - get the HW-specific register value for a selector * @regulator: regulator source * @selector: identify voltage to list * * Converts the selector to a hardware-specific voltage selector that can be * directly written to the regulator registers. The address of the voltage * register can be determined by calling @regulator_get_hardware_vsel_register. * * Return: 0 on success, -%EINVAL if the selector is outside the supported * range, or -%EOPNOTSUPP if the regulator does not support voltage * selectors. */ int regulator_list_hardware_vsel(struct regulator *regulator, unsigned selector) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; if (selector >= rdev->desc->n_voltages) return -EINVAL; if (selector < rdev->desc->linear_min_sel) return 0; if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap) return -EOPNOTSUPP; return selector; } EXPORT_SYMBOL_GPL(regulator_list_hardware_vsel); /** * regulator_hardware_enable - access the HW for enable/disable regulator * @regulator: regulator source * @enable: true for enable, false for disable * * Request that the regulator be enabled/disabled with the regulator output at * the predefined voltage or current value. * * Return: 0 on success or a negative error number on failure. */ int regulator_hardware_enable(struct regulator *regulator, bool enable) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; int ret = -EOPNOTSUPP; if (!rdev->exclusive || !ops || !ops->enable || !ops->disable) return ret; if (enable) ret = ops->enable(rdev); else ret = ops->disable(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_hardware_enable); /** * regulator_get_linear_step - return the voltage step size between VSEL values * @regulator: regulator source * * Return: The voltage step size between VSEL values for linear regulators, * or 0 if the regulator isn't a linear regulator. */ unsigned int regulator_get_linear_step(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; return rdev->desc->uV_step; } EXPORT_SYMBOL_GPL(regulator_get_linear_step); /** * regulator_is_supported_voltage - check if a voltage range can be supported * * @regulator: Regulator to check. * @min_uV: Minimum required voltage in uV. * @max_uV: Maximum required voltage in uV. * * Return: 1 if the voltage range is supported, 0 if not, or a negative error * number if @regulator's voltage can't be changed and voltage readback * failed. */ int regulator_is_supported_voltage(struct regulator *regulator, int min_uV, int max_uV) { struct regulator_dev *rdev = regulator->rdev; int i, voltages, ret; /* If we can't change voltage check the current voltage */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { ret = regulator_get_voltage(regulator); if (ret >= 0) return min_uV <= ret && ret <= max_uV; else return ret; } /* Any voltage within constrains range is fine? */ if (rdev->desc->continuous_voltage_range) return min_uV >= rdev->constraints->min_uV && max_uV <= rdev->constraints->max_uV; ret = regulator_count_voltages(regulator); if (ret < 0) return 0; voltages = ret; for (i = 0; i < voltages; i++) { ret = regulator_list_voltage(regulator, i); if (ret >= min_uV && ret <= max_uV) return 1; } return 0; } EXPORT_SYMBOL_GPL(regulator_is_supported_voltage); static int regulator_map_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) { const struct regulator_desc *desc = rdev->desc; if (desc->ops->map_voltage) return desc->ops->map_voltage(rdev, min_uV, max_uV); if (desc->ops->list_voltage == regulator_list_voltage_linear) return regulator_map_voltage_linear(rdev, min_uV, max_uV); if (desc->ops->list_voltage == regulator_list_voltage_linear_range) return regulator_map_voltage_linear_range(rdev, min_uV, max_uV); if (desc->ops->list_voltage == regulator_list_voltage_pickable_linear_range) return regulator_map_voltage_pickable_linear_range(rdev, min_uV, max_uV); return regulator_map_voltage_iterate(rdev, min_uV, max_uV); } static int _regulator_call_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, unsigned *selector) { struct pre_voltage_change_data data; int ret; data.old_uV = regulator_get_voltage_rdev(rdev); data.min_uV = min_uV; data.max_uV = max_uV; ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, &data); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, selector); if (ret >= 0) return ret; _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, (void *)data.old_uV); return ret; } static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev, int uV, unsigned selector) { struct pre_voltage_change_data data; int ret; data.old_uV = regulator_get_voltage_rdev(rdev); data.min_uV = uV; data.max_uV = uV; ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, &data); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = rdev->desc->ops->set_voltage_sel(rdev, selector); if (ret >= 0) return ret; _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, (void *)data.old_uV); return ret; } static int _regulator_set_voltage_sel_step(struct regulator_dev *rdev, int uV, int new_selector) { const struct regulator_ops *ops = rdev->desc->ops; int diff, old_sel, curr_sel, ret; /* Stepping is only needed if the regulator is enabled. */ if (!_regulator_is_enabled(rdev)) goto final_set; if (!ops->get_voltage_sel) return -EINVAL; old_sel = ops->get_voltage_sel(rdev); if (old_sel < 0) return old_sel; diff = new_selector - old_sel; if (diff == 0) return 0; /* No change needed. */ if (diff > 0) { /* Stepping up. */ for (curr_sel = old_sel + rdev->desc->vsel_step; curr_sel < new_selector; curr_sel += rdev->desc->vsel_step) { /* * Call the callback directly instead of using * _regulator_call_set_voltage_sel() as we don't * want to notify anyone yet. Same in the branch * below. */ ret = ops->set_voltage_sel(rdev, curr_sel); if (ret) goto try_revert; } } else { /* Stepping down. */ for (curr_sel = old_sel - rdev->desc->vsel_step; curr_sel > new_selector; curr_sel -= rdev->desc->vsel_step) { ret = ops->set_voltage_sel(rdev, curr_sel); if (ret) goto try_revert; } } final_set: /* The final selector will trigger the notifiers. */ return _regulator_call_set_voltage_sel(rdev, uV, new_selector); try_revert: /* * At least try to return to the previous voltage if setting a new * one failed. */ (void)ops->set_voltage_sel(rdev, old_sel); return ret; } static int _regulator_set_voltage_time(struct regulator_dev *rdev, int old_uV, int new_uV) { unsigned int ramp_delay = 0; if (rdev->constraints->ramp_delay) ramp_delay = rdev->constraints->ramp_delay; else if (rdev->desc->ramp_delay) ramp_delay = rdev->desc->ramp_delay; else if (rdev->constraints->settling_time) return rdev->constraints->settling_time; else if (rdev->constraints->settling_time_up && (new_uV > old_uV)) return rdev->constraints->settling_time_up; else if (rdev->constraints->settling_time_down && (new_uV < old_uV)) return rdev->constraints->settling_time_down; if (ramp_delay == 0) return 0; return DIV_ROUND_UP(abs(new_uV - old_uV), ramp_delay); } static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) { int ret; int delay = 0; int best_val = 0; unsigned int selector; int old_selector = -1; const struct regulator_ops *ops = rdev->desc->ops; int old_uV = regulator_get_voltage_rdev(rdev); trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV); min_uV += rdev->constraints->uV_offset; max_uV += rdev->constraints->uV_offset; /* * If we can't obtain the old selector there is not enough * info to call set_voltage_time_sel(). */ if (_regulator_is_enabled(rdev) && ops->set_voltage_time_sel && ops->get_voltage_sel) { old_selector = ops->get_voltage_sel(rdev); if (old_selector < 0) return old_selector; } if (ops->set_voltage) { ret = _regulator_call_set_voltage(rdev, min_uV, max_uV, &selector); if (ret >= 0) { if (ops->list_voltage) best_val = ops->list_voltage(rdev, selector); else best_val = regulator_get_voltage_rdev(rdev); } } else if (ops->set_voltage_sel) { ret = regulator_map_voltage(rdev, min_uV, max_uV); if (ret >= 0) { best_val = ops->list_voltage(rdev, ret); if (min_uV <= best_val && max_uV >= best_val) { selector = ret; if (old_selector == selector) ret = 0; else if (rdev->desc->vsel_step) ret = _regulator_set_voltage_sel_step( rdev, best_val, selector); else ret = _regulator_call_set_voltage_sel( rdev, best_val, selector); } else { ret = -EINVAL; } } } else { ret = -EINVAL; } if (ret) goto out; if (ops->set_voltage_time_sel) { /* * Call set_voltage_time_sel if successfully obtained * old_selector */ if (old_selector >= 0 && old_selector != selector) delay = ops->set_voltage_time_sel(rdev, old_selector, selector); } else { if (old_uV != best_val) { if (ops->set_voltage_time) delay = ops->set_voltage_time(rdev, old_uV, best_val); else delay = _regulator_set_voltage_time(rdev, old_uV, best_val); } } if (delay < 0) { rdev_warn(rdev, "failed to get delay: %pe\n", ERR_PTR(delay)); delay = 0; } /* Insert any necessary delays */ fsleep(delay); if (best_val >= 0) { unsigned long data = best_val; _notifier_call_chain(rdev, REGULATOR_EVENT_VOLTAGE_CHANGE, (void *)data); } out: trace_regulator_set_voltage_complete(rdev_get_name(rdev), best_val); return ret; } static int _regulator_do_set_suspend_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, suspend_state_t state) { struct regulator_state *rstate; int uV, sel; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return -EINVAL; if (min_uV < rstate->min_uV) min_uV = rstate->min_uV; if (max_uV > rstate->max_uV) max_uV = rstate->max_uV; sel = regulator_map_voltage(rdev, min_uV, max_uV); if (sel < 0) return sel; uV = rdev->desc->ops->list_voltage(rdev, sel); if (uV >= min_uV && uV <= max_uV) rstate->uV = uV; return 0; } static int regulator_get_voltage_delta(struct regulator_dev *rdev, int uV) { int current_uV = regulator_get_voltage_rdev(rdev); if (current_uV < 0) return current_uV; return abs(current_uV - uV); } static int regulator_set_voltage_unlocked(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) { struct regulator_dev *rdev = regulator->rdev; struct regulator_voltage *voltage = ®ulator->voltage[state]; int ret = 0; int current_uV, delta, new_delta; int old_min_uV, old_max_uV; /* If we're setting the same range as last time the change * should be a noop (some cpufreq implementations use the same * voltage for multiple frequencies, for example). */ if (voltage->min_uV == min_uV && voltage->max_uV == max_uV) goto out; /* If we're trying to set a range that overlaps the current voltage, * return successfully even though the regulator does not support * changing the voltage. */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { current_uV = regulator_get_voltage_rdev(rdev); if (min_uV <= current_uV && current_uV <= max_uV) { voltage->min_uV = min_uV; voltage->max_uV = max_uV; goto out; } } /* sanity check */ if (!rdev->desc->ops->set_voltage && !rdev->desc->ops->set_voltage_sel) { ret = -EINVAL; goto out; } /* constraints check */ ret = regulator_check_voltage(rdev, &min_uV, &max_uV); if (ret < 0) goto out; /* restore original values in case of error */ old_min_uV = voltage->min_uV; old_max_uV = voltage->max_uV; voltage->min_uV = min_uV; voltage->max_uV = max_uV; /* for not coupled regulators this will just set the voltage */ ret = regulator_balance_voltage(rdev, state); if (ret < 0) { voltage->min_uV = old_min_uV; voltage->max_uV = old_max_uV; } if (rdev->constraints->max_uV_step > 0) { /* For regulators with a maximum voltage step, reaching the desired * voltage might take a few retries. */ ret = regulator_get_voltage_delta(rdev, min_uV); if (ret < 0) goto out; delta = ret; while (delta > 0) { ret = regulator_balance_voltage(rdev, state); if (ret < 0) goto out; ret = regulator_get_voltage_delta(rdev, min_uV); if (ret < 0) goto out; new_delta = ret; /* check that voltage is converging quickly enough */ if (delta - new_delta < rdev->constraints->max_uV_step) { ret = -EWOULDBLOCK; goto out; } delta = new_delta; } } out: return ret; } int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV, int max_uV, suspend_state_t state) { int best_supply_uV = 0; int supply_change_uV = 0; int ret; if (rdev->supply && regulator_ops_is_valid(rdev->supply->rdev, REGULATOR_CHANGE_VOLTAGE) && (rdev->desc->min_dropout_uV || !(rdev->desc->ops->get_voltage || rdev->desc->ops->get_voltage_sel))) { int current_supply_uV; int selector; selector = regulator_map_voltage(rdev, min_uV, max_uV); if (selector < 0) { ret = selector; goto out; } best_supply_uV = _regulator_list_voltage(rdev, selector, 0); if (best_supply_uV < 0) { ret = best_supply_uV; goto out; } best_supply_uV += rdev->desc->min_dropout_uV; current_supply_uV = regulator_get_voltage_rdev(rdev->supply->rdev); if (current_supply_uV < 0) { ret = current_supply_uV; goto out; } supply_change_uV = best_supply_uV - current_supply_uV; } if (supply_change_uV > 0) { ret = regulator_set_voltage_unlocked(rdev->supply, best_supply_uV, INT_MAX, state); if (ret) { dev_err(&rdev->dev, "Failed to increase supply voltage: %pe\n", ERR_PTR(ret)); goto out; } } if (state == PM_SUSPEND_ON) ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); else ret = _regulator_do_set_suspend_voltage(rdev, min_uV, max_uV, state); if (ret < 0) goto out; if (supply_change_uV < 0) { ret = regulator_set_voltage_unlocked(rdev->supply, best_supply_uV, INT_MAX, state); if (ret) dev_warn(&rdev->dev, "Failed to decrease supply voltage: %pe\n", ERR_PTR(ret)); /* No need to fail here */ ret = 0; } out: return ret; } EXPORT_SYMBOL_GPL(regulator_set_voltage_rdev); static int regulator_limit_voltage_step(struct regulator_dev *rdev, int *current_uV, int *min_uV) { struct regulation_constraints *constraints = rdev->constraints; /* Limit voltage change only if necessary */ if (!constraints->max_uV_step || !_regulator_is_enabled(rdev)) return 1; if (*current_uV < 0) { *current_uV = regulator_get_voltage_rdev(rdev); if (*current_uV < 0) return *current_uV; } if (abs(*current_uV - *min_uV) <= constraints->max_uV_step) return 1; /* Clamp target voltage within the given step */ if (*current_uV < *min_uV) *min_uV = min(*current_uV + constraints->max_uV_step, *min_uV); else *min_uV = max(*current_uV - constraints->max_uV_step, *min_uV); return 0; } static int regulator_get_optimal_voltage(struct regulator_dev *rdev, int *current_uV, int *min_uV, int *max_uV, suspend_state_t state, int n_coupled) { struct coupling_desc *c_desc = &rdev->coupling_desc; struct regulator_dev **c_rdevs = c_desc->coupled_rdevs; struct regulation_constraints *constraints = rdev->constraints; int desired_min_uV = 0, desired_max_uV = INT_MAX; int max_current_uV = 0, min_current_uV = INT_MAX; int highest_min_uV = 0, target_uV, possible_uV; int i, ret, max_spread; bool done; *current_uV = -1; /* * If there are no coupled regulators, simply set the voltage * demanded by consumers. */ if (n_coupled == 1) { /* * If consumers don't provide any demands, set voltage * to min_uV */ desired_min_uV = constraints->min_uV; desired_max_uV = constraints->max_uV; ret = regulator_check_consumers(rdev, &desired_min_uV, &desired_max_uV, state); if (ret < 0) return ret; done = true; goto finish; } /* Find highest min desired voltage */ for (i = 0; i < n_coupled; i++) { int tmp_min = 0; int tmp_max = INT_MAX; lockdep_assert_held_once(&c_rdevs[i]->mutex.base); ret = regulator_check_consumers(c_rdevs[i], &tmp_min, &tmp_max, state); if (ret < 0) return ret; ret = regulator_check_voltage(c_rdevs[i], &tmp_min, &tmp_max); if (ret < 0) return ret; highest_min_uV = max(highest_min_uV, tmp_min); if (i == 0) { desired_min_uV = tmp_min; desired_max_uV = tmp_max; } } max_spread = constraints->max_spread[0]; /* * Let target_uV be equal to the desired one if possible. * If not, set it to minimum voltage, allowed by other coupled * regulators. */ target_uV = max(desired_min_uV, highest_min_uV - max_spread); /* * Find min and max voltages, which currently aren't violating * max_spread. */ for (i = 1; i < n_coupled; i++) { int tmp_act; if (!_regulator_is_enabled(c_rdevs[i])) continue; tmp_act = regulator_get_voltage_rdev(c_rdevs[i]); if (tmp_act < 0) return tmp_act; min_current_uV = min(tmp_act, min_current_uV); max_current_uV = max(tmp_act, max_current_uV); } /* There aren't any other regulators enabled */ if (max_current_uV == 0) { possible_uV = target_uV; } else { /* * Correct target voltage, so as it currently isn't * violating max_spread */ possible_uV = max(target_uV, max_current_uV - max_spread); possible_uV = min(possible_uV, min_current_uV + max_spread); } if (possible_uV > desired_max_uV) return -EINVAL; done = (possible_uV == target_uV); desired_min_uV = possible_uV; finish: /* Apply max_uV_step constraint if necessary */ if (state == PM_SUSPEND_ON) { ret = regulator_limit_voltage_step(rdev, current_uV, &desired_min_uV); if (ret < 0) return ret; if (ret == 0) done = false; } /* Set current_uV if wasn't done earlier in the code and if necessary */ if (n_coupled > 1 && *current_uV == -1) { if (_regulator_is_enabled(rdev)) { ret = regulator_get_voltage_rdev(rdev); if (ret < 0) return ret; *current_uV = ret; } else { *current_uV = desired_min_uV; } } *min_uV = desired_min_uV; *max_uV = desired_max_uV; return done; } int regulator_do_balance_voltage(struct regulator_dev *rdev, suspend_state_t state, bool skip_coupled) { struct regulator_dev **c_rdevs; struct regulator_dev *best_rdev; struct coupling_desc *c_desc = &rdev->coupling_desc; int i, ret, n_coupled, best_min_uV, best_max_uV, best_c_rdev; unsigned int delta, best_delta; unsigned long c_rdev_done = 0; bool best_c_rdev_done; c_rdevs = c_desc->coupled_rdevs; n_coupled = skip_coupled ? 1 : c_desc->n_coupled; /* * Find the best possible voltage change on each loop. Leave the loop * if there isn't any possible change. */ do { best_c_rdev_done = false; best_delta = 0; best_min_uV = 0; best_max_uV = 0; best_c_rdev = 0; best_rdev = NULL; /* * Find highest difference between optimal voltage * and current voltage. */ for (i = 0; i < n_coupled; i++) { /* * optimal_uV is the best voltage that can be set for * i-th regulator at the moment without violating * max_spread constraint in order to balance * the coupled voltages. */ int optimal_uV = 0, optimal_max_uV = 0, current_uV = 0; if (test_bit(i, &c_rdev_done)) continue; ret = regulator_get_optimal_voltage(c_rdevs[i], ¤t_uV, &optimal_uV, &optimal_max_uV, state, n_coupled); if (ret < 0) goto out; delta = abs(optimal_uV - current_uV); if (delta && best_delta <= delta) { best_c_rdev_done = ret; best_delta = delta; best_rdev = c_rdevs[i]; best_min_uV = optimal_uV; best_max_uV = optimal_max_uV; best_c_rdev = i; } } /* Nothing to change, return successfully */ if (!best_rdev) { ret = 0; goto out; } ret = regulator_set_voltage_rdev(best_rdev, best_min_uV, best_max_uV, state); if (ret < 0) goto out; if (best_c_rdev_done) set_bit(best_c_rdev, &c_rdev_done); } while (n_coupled > 1); out: return ret; } static int regulator_balance_voltage(struct regulator_dev *rdev, suspend_state_t state) { struct coupling_desc *c_desc = &rdev->coupling_desc; struct regulator_coupler *coupler = c_desc->coupler; bool skip_coupled = false; /* * If system is in a state other than PM_SUSPEND_ON, don't check * other coupled regulators. */ if (state != PM_SUSPEND_ON) skip_coupled = true; if (c_desc->n_resolved < c_desc->n_coupled) { rdev_err(rdev, "Not all coupled regulators registered\n"); return -EPERM; } /* Invoke custom balancer for customized couplers */ if (coupler && coupler->balance_voltage) return coupler->balance_voltage(coupler, rdev, state); return regulator_do_balance_voltage(rdev, state, skip_coupled); } /** * regulator_set_voltage - set regulator output voltage * @regulator: regulator source * @min_uV: Minimum required voltage in uV * @max_uV: Maximum acceptable voltage in uV * * Sets a voltage regulator to the desired output voltage. This can be set * during any regulator state. IOW, regulator can be disabled or enabled. * * If the regulator is enabled then the voltage will change to the new value * immediately otherwise if the regulator is disabled the regulator will * output at the new voltage when enabled. * * NOTE: If the regulator is shared between several devices then the lowest * request voltage that meets the system constraints will be used. * Regulator system constraints must be set for this regulator before * calling this function otherwise this call will fail. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) { struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = regulator_set_voltage_unlocked(regulator, min_uV, max_uV, PM_SUSPEND_ON); regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_set_voltage); static inline int regulator_suspend_toggle(struct regulator_dev *rdev, suspend_state_t state, bool en) { struct regulator_state *rstate; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return -EINVAL; if (!rstate->changeable) return -EPERM; rstate->enabled = (en) ? ENABLE_IN_SUSPEND : DISABLE_IN_SUSPEND; return 0; } int regulator_suspend_enable(struct regulator_dev *rdev, suspend_state_t state) { return regulator_suspend_toggle(rdev, state, true); } EXPORT_SYMBOL_GPL(regulator_suspend_enable); int regulator_suspend_disable(struct regulator_dev *rdev, suspend_state_t state) { struct regulator *regulator; struct regulator_voltage *voltage; /* * if any consumer wants this regulator device keeping on in * suspend states, don't set it as disabled. */ list_for_each_entry(regulator, &rdev->consumer_list, list) { voltage = ®ulator->voltage[state]; if (voltage->min_uV || voltage->max_uV) return 0; } return regulator_suspend_toggle(rdev, state, false); } EXPORT_SYMBOL_GPL(regulator_suspend_disable); static int _regulator_set_suspend_voltage(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) { struct regulator_dev *rdev = regulator->rdev; struct regulator_state *rstate; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return -EINVAL; if (rstate->min_uV == rstate->max_uV) { rdev_err(rdev, "The suspend voltage can't be changed!\n"); return -EPERM; } return regulator_set_voltage_unlocked(regulator, min_uV, max_uV, state); } int regulator_set_suspend_voltage(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) { struct ww_acquire_ctx ww_ctx; int ret; /* PM_SUSPEND_ON is handled by regulator_set_voltage() */ if (regulator_check_states(state) || state == PM_SUSPEND_ON) return -EINVAL; regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = _regulator_set_suspend_voltage(regulator, min_uV, max_uV, state); regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_set_suspend_voltage); /** * regulator_set_voltage_time - get raise/fall time * @regulator: regulator source * @old_uV: starting voltage in microvolts * @new_uV: target voltage in microvolts * * Provided with the starting and ending voltage, this function attempts to * calculate the time in microseconds required to rise or fall to this new * voltage. * * Return: ramp time in microseconds, or a negative error number if calculation failed. */ int regulator_set_voltage_time(struct regulator *regulator, int old_uV, int new_uV) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; int old_sel = -1; int new_sel = -1; int voltage; int i; if (ops->set_voltage_time) return ops->set_voltage_time(rdev, old_uV, new_uV); else if (!ops->set_voltage_time_sel) return _regulator_set_voltage_time(rdev, old_uV, new_uV); /* Currently requires operations to do this */ if (!ops->list_voltage || !rdev->desc->n_voltages) return -EINVAL; for (i = 0; i < rdev->desc->n_voltages; i++) { /* We only look for exact voltage matches here */ if (i < rdev->desc->linear_min_sel) continue; if (old_sel >= 0 && new_sel >= 0) break; voltage = regulator_list_voltage(regulator, i); if (voltage < 0) return -EINVAL; if (voltage == 0) continue; if (voltage == old_uV) old_sel = i; if (voltage == new_uV) new_sel = i; } if (old_sel < 0 || new_sel < 0) return -EINVAL; return ops->set_voltage_time_sel(rdev, old_sel, new_sel); } EXPORT_SYMBOL_GPL(regulator_set_voltage_time); /** * regulator_set_voltage_time_sel - get raise/fall time * @rdev: regulator source device * @old_selector: selector for starting voltage * @new_selector: selector for target voltage * * Provided with the starting and target voltage selectors, this function * returns time in microseconds required to rise or fall to this new voltage * * Drivers providing ramp_delay in regulation_constraints can use this as their * set_voltage_time_sel() operation. * * Return: ramp time in microseconds, or a negative error number if calculation failed. */ int regulator_set_voltage_time_sel(struct regulator_dev *rdev, unsigned int old_selector, unsigned int new_selector) { int old_volt, new_volt; /* sanity check */ if (!rdev->desc->ops->list_voltage) return -EINVAL; old_volt = rdev->desc->ops->list_voltage(rdev, old_selector); new_volt = rdev->desc->ops->list_voltage(rdev, new_selector); if (rdev->desc->ops->set_voltage_time) return rdev->desc->ops->set_voltage_time(rdev, old_volt, new_volt); else return _regulator_set_voltage_time(rdev, old_volt, new_volt); } EXPORT_SYMBOL_GPL(regulator_set_voltage_time_sel); int regulator_sync_voltage_rdev(struct regulator_dev *rdev) { int ret; regulator_lock(rdev); if (!rdev->desc->ops->set_voltage && !rdev->desc->ops->set_voltage_sel) { ret = -EINVAL; goto out; } /* balance only, if regulator is coupled */ if (rdev->coupling_desc.n_coupled > 1) ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); else ret = -EOPNOTSUPP; out: regulator_unlock(rdev); return ret; } /** * regulator_sync_voltage - re-apply last regulator output voltage * @regulator: regulator source * * Re-apply the last configured voltage. This is intended to be used * where some external control source the consumer is cooperating with * has caused the configured voltage to change. * * Return: 0 on success or a negative error number on failure. */ int regulator_sync_voltage(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct regulator_voltage *voltage = ®ulator->voltage[PM_SUSPEND_ON]; int ret, min_uV, max_uV; if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) return 0; regulator_lock(rdev); if (!rdev->desc->ops->set_voltage && !rdev->desc->ops->set_voltage_sel) { ret = -EINVAL; goto out; } /* This is only going to work if we've had a voltage configured. */ if (!voltage->min_uV && !voltage->max_uV) { ret = -EINVAL; goto out; } min_uV = voltage->min_uV; max_uV = voltage->max_uV; /* This should be a paranoia check... */ ret = regulator_check_voltage(rdev, &min_uV, &max_uV); if (ret < 0) goto out; ret = regulator_check_consumers(rdev, &min_uV, &max_uV, 0); if (ret < 0) goto out; /* balance only, if regulator is coupled */ if (rdev->coupling_desc.n_coupled > 1) ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); else ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_sync_voltage); int regulator_get_voltage_rdev(struct regulator_dev *rdev) { int sel, ret; bool bypassed; if (rdev->desc->ops->get_bypass) { ret = rdev->desc->ops->get_bypass(rdev, &bypassed); if (ret < 0) return ret; if (bypassed) { /* if bypassed the regulator must have a supply */ if (!rdev->supply) { rdev_err(rdev, "bypassed regulator has no supply!\n"); return -EPROBE_DEFER; } return regulator_get_voltage_rdev(rdev->supply->rdev); } } if (rdev->desc->ops->get_voltage_sel) { sel = rdev->desc->ops->get_voltage_sel(rdev); if (sel < 0) return sel; ret = rdev->desc->ops->list_voltage(rdev, sel); } else if (rdev->desc->ops->get_voltage) { ret = rdev->desc->ops->get_voltage(rdev); } else if (rdev->desc->ops->list_voltage) { ret = rdev->desc->ops->list_voltage(rdev, 0); } else if (rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1)) { ret = rdev->desc->fixed_uV; } else if (rdev->supply) { ret = regulator_get_voltage_rdev(rdev->supply->rdev); } else if (rdev->supply_name) { return -EPROBE_DEFER; } else { return -EINVAL; } if (ret < 0) return ret; return ret - rdev->constraints->uV_offset; } EXPORT_SYMBOL_GPL(regulator_get_voltage_rdev); /** * regulator_get_voltage - get regulator output voltage * @regulator: regulator source * * Return: Current regulator voltage in uV, or a negative error number on failure. * * NOTE: If the regulator is disabled it will return the voltage value. This * function should not be used to determine regulator state. */ int regulator_get_voltage(struct regulator *regulator) { struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = regulator_get_voltage_rdev(regulator->rdev); regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_get_voltage); /** * regulator_set_current_limit - set regulator output current limit * @regulator: regulator source * @min_uA: Minimum supported current in uA * @max_uA: Maximum supported current in uA * * Sets current sink to the desired output current. This can be set during * any regulator state. IOW, regulator can be disabled or enabled. * * If the regulator is enabled then the current will change to the new value * immediately otherwise if the regulator is disabled the regulator will * output at the new current when enabled. * * NOTE: Regulator system constraints must be set for this regulator before * calling this function otherwise this call will fail. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_current_limit(struct regulator *regulator, int min_uA, int max_uA) { struct regulator_dev *rdev = regulator->rdev; int ret; regulator_lock(rdev); /* sanity check */ if (!rdev->desc->ops->set_current_limit) { ret = -EINVAL; goto out; } /* constraints check */ ret = regulator_check_current_limit(rdev, &min_uA, &max_uA); if (ret < 0) goto out; ret = rdev->desc->ops->set_current_limit(rdev, min_uA, max_uA); out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_set_current_limit); static int _regulator_get_current_limit_unlocked(struct regulator_dev *rdev) { /* sanity check */ if (!rdev->desc->ops->get_current_limit) return -EINVAL; return rdev->desc->ops->get_current_limit(rdev); } static int _regulator_get_current_limit(struct regulator_dev *rdev) { int ret; regulator_lock(rdev); ret = _regulator_get_current_limit_unlocked(rdev); regulator_unlock(rdev); return ret; } /** * regulator_get_current_limit - get regulator output current * @regulator: regulator source * * Return: Current supplied by the specified current sink in uA, * or a negative error number on failure. * * NOTE: If the regulator is disabled it will return the current value. This * function should not be used to determine regulator state. */ int regulator_get_current_limit(struct regulator *regulator) { return _regulator_get_current_limit(regulator->rdev); } EXPORT_SYMBOL_GPL(regulator_get_current_limit); /** * regulator_get_unclaimed_power_budget - get regulator unclaimed power budget * @regulator: regulator source * * Return: Unclaimed power budget of the regulator in mW. */ int regulator_get_unclaimed_power_budget(struct regulator *regulator) { return regulator->rdev->constraints->pw_budget_mW - regulator->rdev->pw_requested_mW; } EXPORT_SYMBOL_GPL(regulator_get_unclaimed_power_budget); /** * regulator_request_power_budget - request power budget on a regulator * @regulator: regulator source * @pw_req: Power requested * * Return: 0 on success or a negative error number on failure. */ int regulator_request_power_budget(struct regulator *regulator, unsigned int pw_req) { struct regulator_dev *rdev = regulator->rdev; int ret = 0, pw_tot_req; regulator_lock(rdev); if (rdev->supply) { ret = regulator_request_power_budget(rdev->supply, pw_req); if (ret < 0) goto out; } pw_tot_req = rdev->pw_requested_mW + pw_req; if (pw_tot_req > rdev->constraints->pw_budget_mW) { rdev_warn(rdev, "power requested %d mW out of budget %d mW", pw_req, rdev->constraints->pw_budget_mW - rdev->pw_requested_mW); regulator_notifier_call_chain(rdev, REGULATOR_EVENT_OVER_CURRENT_WARN, NULL); ret = -ERANGE; goto out; } rdev->pw_requested_mW = pw_tot_req; out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_request_power_budget); /** * regulator_free_power_budget - free power budget on a regulator * @regulator: regulator source * @pw: Power to be released. * * Return: Power budget of the regulator in mW. */ void regulator_free_power_budget(struct regulator *regulator, unsigned int pw) { struct regulator_dev *rdev = regulator->rdev; int pw_tot_req; regulator_lock(rdev); if (rdev->supply) regulator_free_power_budget(rdev->supply, pw); pw_tot_req = rdev->pw_requested_mW - pw; if (pw_tot_req >= 0) rdev->pw_requested_mW = pw_tot_req; else rdev_warn(rdev, "too much power freed %d mW (already requested %d mW)", pw, rdev->pw_requested_mW); regulator_unlock(rdev); } EXPORT_SYMBOL_GPL(regulator_free_power_budget); /** * regulator_set_mode - set regulator operating mode * @regulator: regulator source * @mode: operating mode - one of the REGULATOR_MODE constants * * Set regulator operating mode to increase regulator efficiency or improve * regulation performance. * * NOTE: Regulator system constraints must be set for this regulator before * calling this function otherwise this call will fail. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_mode(struct regulator *regulator, unsigned int mode) { struct regulator_dev *rdev = regulator->rdev; int ret; int regulator_curr_mode; regulator_lock(rdev); /* sanity check */ if (!rdev->desc->ops->set_mode) { ret = -EINVAL; goto out; } /* return if the same mode is requested */ if (rdev->desc->ops->get_mode) { regulator_curr_mode = rdev->desc->ops->get_mode(rdev); if (regulator_curr_mode == mode) { ret = 0; goto out; } } /* constraints check */ ret = regulator_mode_constrain(rdev, &mode); if (ret < 0) goto out; ret = rdev->desc->ops->set_mode(rdev, mode); out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_set_mode); static unsigned int _regulator_get_mode_unlocked(struct regulator_dev *rdev) { /* sanity check */ if (!rdev->desc->ops->get_mode) return -EINVAL; return rdev->desc->ops->get_mode(rdev); } static unsigned int _regulator_get_mode(struct regulator_dev *rdev) { int ret; regulator_lock(rdev); ret = _regulator_get_mode_unlocked(rdev); regulator_unlock(rdev); return ret; } /** * regulator_get_mode - get regulator operating mode * @regulator: regulator source * * Get the current regulator operating mode. * * Return: Current operating mode as %REGULATOR_MODE_* values, * or a negative error number on failure. */ unsigned int regulator_get_mode(struct regulator *regulator) { return _regulator_get_mode(regulator->rdev); } EXPORT_SYMBOL_GPL(regulator_get_mode); static int rdev_get_cached_err_flags(struct regulator_dev *rdev) { int ret = 0; if (rdev->use_cached_err) { spin_lock(&rdev->err_lock); ret = rdev->cached_err; spin_unlock(&rdev->err_lock); } return ret; } static int _regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags) { int cached_flags, ret = 0; regulator_lock(rdev); cached_flags = rdev_get_cached_err_flags(rdev); if (rdev->desc->ops->get_error_flags) ret = rdev->desc->ops->get_error_flags(rdev, flags); else if (!rdev->use_cached_err) ret = -EINVAL; *flags |= cached_flags; regulator_unlock(rdev); return ret; } /** * regulator_get_error_flags - get regulator error information * @regulator: regulator source * @flags: pointer to store error flags * * Get the current regulator error information. * * Return: 0 on success or a negative error number on failure. */ int regulator_get_error_flags(struct regulator *regulator, unsigned int *flags) { return _regulator_get_error_flags(regulator->rdev, flags); } EXPORT_SYMBOL_GPL(regulator_get_error_flags); /** * regulator_set_load - set regulator load * @regulator: regulator source * @uA_load: load current * * Notifies the regulator core of a new device load. This is then used by * DRMS (if enabled by constraints) to set the most efficient regulator * operating mode for the new regulator loading. * * Consumer devices notify their supply regulator of the maximum power * they will require (can be taken from device datasheet in the power * consumption tables) when they change operational status and hence power * state. Examples of operational state changes that can affect power * consumption are :- * * o Device is opened / closed. * o Device I/O is about to begin or has just finished. * o Device is idling in between work. * * This information is also exported via sysfs to userspace. * * DRMS will sum the total requested load on the regulator and change * to the most efficient operating mode if platform constraints allow. * * NOTE: when a regulator consumer requests to have a regulator * disabled then any load that consumer requested no longer counts * toward the total requested load. If the regulator is re-enabled * then the previously requested load will start counting again. * * If a regulator is an always-on regulator then an individual consumer's * load will still be removed if that consumer is fully disabled. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_load(struct regulator *regulator, int uA_load) { struct regulator_dev *rdev = regulator->rdev; int old_uA_load; int ret = 0; regulator_lock(rdev); old_uA_load = regulator->uA_load; regulator->uA_load = uA_load; if (regulator->enable_count && old_uA_load != uA_load) { ret = drms_uA_update(rdev); if (ret < 0) regulator->uA_load = old_uA_load; } regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_set_load); /** * regulator_allow_bypass - allow the regulator to go into bypass mode * * @regulator: Regulator to configure * @enable: enable or disable bypass mode * * Allow the regulator to go into bypass mode if all other consumers * for the regulator also enable bypass mode and the machine * constraints allow this. Bypass mode means that the regulator is * simply passing the input directly to the output with no regulation. * * Return: 0 on success or if changing bypass is not possible, or * a negative error number on failure. */ int regulator_allow_bypass(struct regulator *regulator, bool enable) { struct regulator_dev *rdev = regulator->rdev; const char *name = rdev_get_name(rdev); int ret = 0; if (!rdev->desc->ops->set_bypass) return 0; if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_BYPASS)) return 0; regulator_lock(rdev); if (enable && !regulator->bypass) { rdev->bypass_count++; if (rdev->bypass_count == rdev->open_count) { trace_regulator_bypass_enable(name); ret = rdev->desc->ops->set_bypass(rdev, enable); if (ret != 0) rdev->bypass_count--; else trace_regulator_bypass_enable_complete(name); } } else if (!enable && regulator->bypass) { rdev->bypass_count--; if (rdev->bypass_count != rdev->open_count) { trace_regulator_bypass_disable(name); ret = rdev->desc->ops->set_bypass(rdev, enable); if (ret != 0) rdev->bypass_count++; else trace_regulator_bypass_disable_complete(name); } } if (ret == 0) regulator->bypass = enable; regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_allow_bypass); /** * regulator_register_notifier - register regulator event notifier * @regulator: regulator source * @nb: notifier block * * Register notifier block to receive regulator events. * * Return: 0 on success or a negative error number on failure. */ int regulator_register_notifier(struct regulator *regulator, struct notifier_block *nb) { return blocking_notifier_chain_register(®ulator->rdev->notifier, nb); } EXPORT_SYMBOL_GPL(regulator_register_notifier); /** * regulator_unregister_notifier - unregister regulator event notifier * @regulator: regulator source * @nb: notifier block * * Unregister regulator event notifier block. * * Return: 0 on success or a negative error number on failure. */ int regulator_unregister_notifier(struct regulator *regulator, struct notifier_block *nb) { return blocking_notifier_chain_unregister(®ulator->rdev->notifier, nb); } EXPORT_SYMBOL_GPL(regulator_unregister_notifier); /* notify regulator consumers and downstream regulator consumers. * Note mutex must be held by caller. */ static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { /* call rdev chain first */ int ret = blocking_notifier_call_chain(&rdev->notifier, event, data); if (IS_REACHABLE(CONFIG_REGULATOR_NETLINK_EVENTS)) { struct device *parent = rdev->dev.parent; const char *rname = rdev_get_name(rdev); char name[32]; /* Avoid duplicate debugfs directory names */ if (parent && rname == rdev->desc->name) { snprintf(name, sizeof(name), "%s-%s", dev_name(parent), rname); rname = name; } reg_generate_netlink_event(rname, event); } return ret; } int _regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers, enum regulator_get_type get_type) { int i; int ret; for (i = 0; i < num_consumers; i++) consumers[i].consumer = NULL; for (i = 0; i < num_consumers; i++) { consumers[i].consumer = _regulator_get(dev, consumers[i].supply, get_type); if (IS_ERR(consumers[i].consumer)) { ret = dev_err_probe(dev, PTR_ERR(consumers[i].consumer), "Failed to get supply '%s'\n", consumers[i].supply); consumers[i].consumer = NULL; goto err; } if (consumers[i].init_load_uA > 0) { ret = regulator_set_load(consumers[i].consumer, consumers[i].init_load_uA); if (ret) { i++; goto err; } } } return 0; err: while (--i >= 0) regulator_put(consumers[i].consumer); return ret; } /** * regulator_bulk_get - get multiple regulator consumers * * @dev: Device to supply * @num_consumers: Number of consumers to register * @consumers: Configuration of consumers; clients are stored here. * * This helper function allows drivers to get several regulator * consumers in one operation. If any of the regulators cannot be * acquired then any regulators that were allocated will be freed * before returning to the caller. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers) { return _regulator_bulk_get(dev, num_consumers, consumers, NORMAL_GET); } EXPORT_SYMBOL_GPL(regulator_bulk_get); static void regulator_bulk_enable_async(void *data, async_cookie_t cookie) { struct regulator_bulk_data *bulk = data; bulk->ret = regulator_enable(bulk->consumer); } /** * regulator_bulk_enable - enable multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to enable multiple regulator * clients in a single API call. If any consumers cannot be enabled * then any others that were enabled will be disabled again prior to * return. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { ASYNC_DOMAIN_EXCLUSIVE(async_domain); int i; int ret = 0; for (i = 0; i < num_consumers; i++) { async_schedule_domain(regulator_bulk_enable_async, &consumers[i], &async_domain); } async_synchronize_full_domain(&async_domain); /* If any consumer failed we need to unwind any that succeeded */ for (i = 0; i < num_consumers; i++) { if (consumers[i].ret != 0) { ret = consumers[i].ret; goto err; } } return 0; err: for (i = 0; i < num_consumers; i++) { if (consumers[i].ret < 0) pr_err("Failed to enable %s: %pe\n", consumers[i].supply, ERR_PTR(consumers[i].ret)); else regulator_disable(consumers[i].consumer); } return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_enable); /** * regulator_bulk_disable - disable multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to disable multiple regulator * clients in a single API call. If any consumers cannot be disabled * then any others that were disabled will be enabled again prior to * return. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_disable(int num_consumers, struct regulator_bulk_data *consumers) { int i; int ret, r; for (i = num_consumers - 1; i >= 0; --i) { ret = regulator_disable(consumers[i].consumer); if (ret != 0) goto err; } return 0; err: pr_err("Failed to disable %s: %pe\n", consumers[i].supply, ERR_PTR(ret)); for (++i; i < num_consumers; ++i) { r = regulator_enable(consumers[i].consumer); if (r != 0) pr_err("Failed to re-enable %s: %pe\n", consumers[i].supply, ERR_PTR(r)); } return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_disable); /** * regulator_bulk_force_disable - force disable multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to forcibly disable multiple regulator * clients in a single API call. * NOTE: This should be used for situations when device damage will * likely occur if the regulators are not disabled (e.g. over temp). * Although regulator_force_disable function call for some consumers can * return error numbers, the function is called for all consumers. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_force_disable(int num_consumers, struct regulator_bulk_data *consumers) { int i; int ret = 0; for (i = 0; i < num_consumers; i++) { consumers[i].ret = regulator_force_disable(consumers[i].consumer); /* Store first error for reporting */ if (consumers[i].ret && !ret) ret = consumers[i].ret; } return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_force_disable); /** * regulator_bulk_free - free multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to free multiple regulator * clients in a single API call. */ void regulator_bulk_free(int num_consumers, struct regulator_bulk_data *consumers) { int i; for (i = 0; i < num_consumers; i++) { regulator_put(consumers[i].consumer); consumers[i].consumer = NULL; } } EXPORT_SYMBOL_GPL(regulator_bulk_free); /** * regulator_handle_critical - Handle events for system-critical regulators. * @rdev: The regulator device. * @event: The event being handled. * * This function handles critical events such as under-voltage, over-current, * and unknown errors for regulators deemed system-critical. On detecting such * events, it triggers a hardware protection shutdown with a defined timeout. */ static void regulator_handle_critical(struct regulator_dev *rdev, unsigned long event) { const char *reason = NULL; if (!rdev->constraints->system_critical) return; switch (event) { case REGULATOR_EVENT_UNDER_VOLTAGE: reason = "System critical regulator: voltage drop detected"; break; case REGULATOR_EVENT_OVER_CURRENT: reason = "System critical regulator: over-current detected"; break; case REGULATOR_EVENT_FAIL: reason = "System critical regulator: unknown error"; } if (!reason) return; hw_protection_trigger(reason, rdev->constraints->uv_less_critical_window_ms); } /** * regulator_notifier_call_chain - call regulator event notifier * @rdev: regulator source * @event: notifier block * @data: callback-specific data. * * Called by regulator drivers to notify clients a regulator event has * occurred. * * Return: %NOTIFY_DONE. */ int regulator_notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { regulator_handle_critical(rdev, event); _notifier_call_chain(rdev, event, data); return NOTIFY_DONE; } EXPORT_SYMBOL_GPL(regulator_notifier_call_chain); /** * regulator_mode_to_status - convert a regulator mode into a status * * @mode: Mode to convert * * Convert a regulator mode into a status. * * Return: %REGULATOR_STATUS_* value corresponding to given mode. */ int regulator_mode_to_status(unsigned int mode) { switch (mode) { case REGULATOR_MODE_FAST: return REGULATOR_STATUS_FAST; case REGULATOR_MODE_NORMAL: return REGULATOR_STATUS_NORMAL; case REGULATOR_MODE_IDLE: return REGULATOR_STATUS_IDLE; case REGULATOR_MODE_STANDBY: return REGULATOR_STATUS_STANDBY; default: return REGULATOR_STATUS_UNDEFINED; } } EXPORT_SYMBOL_GPL(regulator_mode_to_status); static struct attribute *regulator_dev_attrs[] = { &dev_attr_name.attr, &dev_attr_num_users.attr, &dev_attr_type.attr, &dev_attr_microvolts.attr, &dev_attr_microamps.attr, &dev_attr_opmode.attr, &dev_attr_state.attr, &dev_attr_status.attr, &dev_attr_bypass.attr, &dev_attr_requested_microamps.attr, &dev_attr_min_microvolts.attr, &dev_attr_max_microvolts.attr, &dev_attr_min_microamps.attr, &dev_attr_max_microamps.attr, &dev_attr_under_voltage.attr, &dev_attr_over_current.attr, &dev_attr_regulation_out.attr, &dev_attr_fail.attr, &dev_attr_over_temp.attr, &dev_attr_under_voltage_warn.attr, &dev_attr_over_current_warn.attr, &dev_attr_over_voltage_warn.attr, &dev_attr_over_temp_warn.attr, &dev_attr_suspend_standby_state.attr, &dev_attr_suspend_mem_state.attr, &dev_attr_suspend_disk_state.attr, &dev_attr_suspend_standby_microvolts.attr, &dev_attr_suspend_mem_microvolts.attr, &dev_attr_suspend_disk_microvolts.attr, &dev_attr_suspend_standby_mode.attr, &dev_attr_suspend_mem_mode.attr, &dev_attr_suspend_disk_mode.attr, &dev_attr_power_budget_milliwatt.attr, &dev_attr_power_requested_milliwatt.attr, NULL }; /* * To avoid cluttering sysfs (and memory) with useless state, only * create attributes that can be meaningfully displayed. */ static umode_t regulator_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { struct device *dev = kobj_to_dev(kobj); struct regulator_dev *rdev = dev_to_rdev(dev); const struct regulator_ops *ops = rdev->desc->ops; umode_t mode = attr->mode; /* these three are always present */ if (attr == &dev_attr_name.attr || attr == &dev_attr_num_users.attr || attr == &dev_attr_type.attr) return mode; /* some attributes need specific methods to be displayed */ if (attr == &dev_attr_microvolts.attr) { if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) || (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0) || (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0) || (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1)) return mode; return 0; } if (attr == &dev_attr_microamps.attr) return ops->get_current_limit ? mode : 0; if (attr == &dev_attr_opmode.attr) return ops->get_mode ? mode : 0; if (attr == &dev_attr_state.attr) return (rdev->ena_pin || ops->is_enabled) ? mode : 0; if (attr == &dev_attr_status.attr) return ops->get_status ? mode : 0; if (attr == &dev_attr_bypass.attr) return ops->get_bypass ? mode : 0; if (attr == &dev_attr_under_voltage.attr || attr == &dev_attr_over_current.attr || attr == &dev_attr_regulation_out.attr || attr == &dev_attr_fail.attr || attr == &dev_attr_over_temp.attr || attr == &dev_attr_under_voltage_warn.attr || attr == &dev_attr_over_current_warn.attr || attr == &dev_attr_over_voltage_warn.attr || attr == &dev_attr_over_temp_warn.attr) return ops->get_error_flags ? mode : 0; /* constraints need specific supporting methods */ if (attr == &dev_attr_min_microvolts.attr || attr == &dev_attr_max_microvolts.attr) return (ops->set_voltage || ops->set_voltage_sel) ? mode : 0; if (attr == &dev_attr_min_microamps.attr || attr == &dev_attr_max_microamps.attr) return ops->set_current_limit ? mode : 0; if (attr == &dev_attr_suspend_standby_state.attr || attr == &dev_attr_suspend_mem_state.attr || attr == &dev_attr_suspend_disk_state.attr) return mode; if (attr == &dev_attr_suspend_standby_microvolts.attr || attr == &dev_attr_suspend_mem_microvolts.attr || attr == &dev_attr_suspend_disk_microvolts.attr) return ops->set_suspend_voltage ? mode : 0; if (attr == &dev_attr_suspend_standby_mode.attr || attr == &dev_attr_suspend_mem_mode.attr || attr == &dev_attr_suspend_disk_mode.attr) return ops->set_suspend_mode ? mode : 0; if (attr == &dev_attr_power_budget_milliwatt.attr || attr == &dev_attr_power_requested_milliwatt.attr) return rdev->constraints->pw_budget_mW != INT_MAX ? mode : 0; return mode; } static const struct attribute_group regulator_dev_group = { .attrs = regulator_dev_attrs, .is_visible = regulator_attr_is_visible, }; static const struct attribute_group *regulator_dev_groups[] = { ®ulator_dev_group, NULL }; static void regulator_dev_release(struct device *dev) { struct regulator_dev *rdev = dev_get_drvdata(dev); debugfs_remove_recursive(rdev->debugfs); kfree(rdev->constraints); of_node_put(rdev->dev.of_node); kfree(rdev); } static void rdev_init_debugfs(struct regulator_dev *rdev) { struct device *parent = rdev->dev.parent; const char *rname = rdev_get_name(rdev); char name[NAME_MAX]; /* Avoid duplicate debugfs directory names */ if (parent && rname == rdev->desc->name) { snprintf(name, sizeof(name), "%s-%s", dev_name(parent), rname); rname = name; } rdev->debugfs = debugfs_create_dir(rname, debugfs_root); if (IS_ERR(rdev->debugfs)) rdev_dbg(rdev, "Failed to create debugfs directory\n"); debugfs_create_u32("use_count", 0444, rdev->debugfs, &rdev->use_count); debugfs_create_u32("open_count", 0444, rdev->debugfs, &rdev->open_count); debugfs_create_u32("bypass_count", 0444, rdev->debugfs, &rdev->bypass_count); } static int regulator_register_resolve_supply(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); if (regulator_resolve_supply(rdev)) rdev_dbg(rdev, "unable to resolve supply\n"); return 0; } int regulator_coupler_register(struct regulator_coupler *coupler) { mutex_lock(®ulator_list_mutex); list_add_tail(&coupler->list, ®ulator_coupler_list); mutex_unlock(®ulator_list_mutex); return 0; } static struct regulator_coupler * regulator_find_coupler(struct regulator_dev *rdev) { struct regulator_coupler *coupler; int err; /* * Note that regulators are appended to the list and the generic * coupler is registered first, hence it will be attached at last * if nobody cared. */ list_for_each_entry_reverse(coupler, ®ulator_coupler_list, list) { err = coupler->attach_regulator(coupler, rdev); if (!err) { if (!coupler->balance_voltage && rdev->coupling_desc.n_coupled > 2) goto err_unsupported; return coupler; } if (err < 0) return ERR_PTR(err); if (err == 1) continue; break; } return ERR_PTR(-EINVAL); err_unsupported: if (coupler->detach_regulator) coupler->detach_regulator(coupler, rdev); rdev_err(rdev, "Voltage balancing for multiple regulator couples is unimplemented\n"); return ERR_PTR(-EPERM); } static void regulator_resolve_coupling(struct regulator_dev *rdev) { struct regulator_coupler *coupler = rdev->coupling_desc.coupler; struct coupling_desc *c_desc = &rdev->coupling_desc; int n_coupled = c_desc->n_coupled; struct regulator_dev *c_rdev; int i; for (i = 1; i < n_coupled; i++) { /* already resolved */ if (c_desc->coupled_rdevs[i]) continue; c_rdev = of_parse_coupled_regulator(rdev, i - 1); if (!c_rdev) continue; if (c_rdev->coupling_desc.coupler != coupler) { rdev_err(rdev, "coupler mismatch with %s\n", rdev_get_name(c_rdev)); return; } c_desc->coupled_rdevs[i] = c_rdev; c_desc->n_resolved++; regulator_resolve_coupling(c_rdev); } } static void regulator_remove_coupling(struct regulator_dev *rdev) { struct regulator_coupler *coupler = rdev->coupling_desc.coupler; struct coupling_desc *__c_desc, *c_desc = &rdev->coupling_desc; struct regulator_dev *__c_rdev, *c_rdev; unsigned int __n_coupled, n_coupled; int i, k; int err; n_coupled = c_desc->n_coupled; for (i = 1; i < n_coupled; i++) { c_rdev = c_desc->coupled_rdevs[i]; if (!c_rdev) continue; regulator_lock(c_rdev); __c_desc = &c_rdev->coupling_desc; __n_coupled = __c_desc->n_coupled; for (k = 1; k < __n_coupled; k++) { __c_rdev = __c_desc->coupled_rdevs[k]; if (__c_rdev == rdev) { __c_desc->coupled_rdevs[k] = NULL; __c_desc->n_resolved--; break; } } regulator_unlock(c_rdev); c_desc->coupled_rdevs[i] = NULL; c_desc->n_resolved--; } if (coupler && coupler->detach_regulator) { err = coupler->detach_regulator(coupler, rdev); if (err) rdev_err(rdev, "failed to detach from coupler: %pe\n", ERR_PTR(err)); } rdev->coupling_desc.n_coupled = 0; kfree(rdev->coupling_desc.coupled_rdevs); rdev->coupling_desc.coupled_rdevs = NULL; } static int regulator_init_coupling(struct regulator_dev *rdev) { struct regulator_dev **coupled; int err, n_phandles; if (!IS_ENABLED(CONFIG_OF)) n_phandles = 0; else n_phandles = of_get_n_coupled(rdev); coupled = kcalloc(n_phandles + 1, sizeof(*coupled), GFP_KERNEL); if (!coupled) return -ENOMEM; rdev->coupling_desc.coupled_rdevs = coupled; /* * Every regulator should always have coupling descriptor filled with * at least pointer to itself. */ rdev->coupling_desc.coupled_rdevs[0] = rdev; rdev->coupling_desc.n_coupled = n_phandles + 1; rdev->coupling_desc.n_resolved++; /* regulator isn't coupled */ if (n_phandles == 0) return 0; if (!of_check_coupling_data(rdev)) return -EPERM; mutex_lock(®ulator_list_mutex); rdev->coupling_desc.coupler = regulator_find_coupler(rdev); mutex_unlock(®ulator_list_mutex); if (IS_ERR(rdev->coupling_desc.coupler)) { err = PTR_ERR(rdev->coupling_desc.coupler); rdev_err(rdev, "failed to get coupler: %pe\n", ERR_PTR(err)); return err; } return 0; } static int generic_coupler_attach(struct regulator_coupler *coupler, struct regulator_dev *rdev) { if (rdev->coupling_desc.n_coupled > 2) { rdev_err(rdev, "Voltage balancing for multiple regulator couples is unimplemented\n"); return -EPERM; } if (!rdev->constraints->always_on) { rdev_err(rdev, "Coupling of a non always-on regulator is unimplemented\n"); return -ENOTSUPP; } return 0; } static struct regulator_coupler generic_regulator_coupler = { .attach_regulator = generic_coupler_attach, }; /** * regulator_register - register regulator * @dev: the device that drive the regulator * @regulator_desc: regulator to register * @cfg: runtime configuration for regulator * * Called by regulator drivers to register a regulator. * * Return: Pointer to a valid &struct regulator_dev on success or * an ERR_PTR() encoded negative error number on failure. */ struct regulator_dev * regulator_register(struct device *dev, const struct regulator_desc *regulator_desc, const struct regulator_config *cfg) { const struct regulator_init_data *init_data; struct regulator_config *config = NULL; static atomic_t regulator_no = ATOMIC_INIT(-1); struct regulator_dev *rdev; bool dangling_cfg_gpiod = false; bool dangling_of_gpiod = false; int ret, i; bool resolved_early = false; if (cfg == NULL) return ERR_PTR(-EINVAL); if (cfg->ena_gpiod) dangling_cfg_gpiod = true; if (regulator_desc == NULL) { ret = -EINVAL; goto rinse; } WARN_ON(!dev || !cfg->dev); if (regulator_desc->name == NULL || regulator_desc->ops == NULL) { ret = -EINVAL; goto rinse; } if (regulator_desc->type != REGULATOR_VOLTAGE && regulator_desc->type != REGULATOR_CURRENT) { ret = -EINVAL; goto rinse; } /* Only one of each should be implemented */ WARN_ON(regulator_desc->ops->get_voltage && regulator_desc->ops->get_voltage_sel); WARN_ON(regulator_desc->ops->set_voltage && regulator_desc->ops->set_voltage_sel); /* If we're using selectors we must implement list_voltage. */ if (regulator_desc->ops->get_voltage_sel && !regulator_desc->ops->list_voltage) { ret = -EINVAL; goto rinse; } if (regulator_desc->ops->set_voltage_sel && !regulator_desc->ops->list_voltage) { ret = -EINVAL; goto rinse; } rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL); if (rdev == NULL) { ret = -ENOMEM; goto rinse; } device_initialize(&rdev->dev); dev_set_drvdata(&rdev->dev, rdev); rdev->dev.class = ®ulator_class; spin_lock_init(&rdev->err_lock); /* * Duplicate the config so the driver could override it after * parsing init data. */ config = kmemdup(cfg, sizeof(*cfg), GFP_KERNEL); if (config == NULL) { ret = -ENOMEM; goto clean; } /* * DT may override the config->init_data provided if the platform * needs to do so. If so, config->init_data is completely ignored. */ init_data = regulator_of_get_init_data(dev, regulator_desc, config, &rdev->dev.of_node); /* * Sometimes not all resources are probed already so we need to take * that into account. This happens most the time if the ena_gpiod comes * from a gpio extender or something else. */ if (PTR_ERR(init_data) == -EPROBE_DEFER) { ret = -EPROBE_DEFER; goto clean; } /* * We need to keep track of any GPIO descriptor coming from the * device tree until we have handled it over to the core. If the * config that was passed in to this function DOES NOT contain * a descriptor, and the config after this call DOES contain * a descriptor, we definitely got one from parsing the device * tree. */ if (!cfg->ena_gpiod && config->ena_gpiod) dangling_of_gpiod = true; if (!init_data) { init_data = config->init_data; rdev->dev.of_node = of_node_get(config->of_node); } ww_mutex_init(&rdev->mutex, ®ulator_ww_class); rdev->reg_data = config->driver_data; rdev->owner = regulator_desc->owner; rdev->desc = regulator_desc; if (config->regmap) rdev->regmap = config->regmap; else if (dev_get_regmap(dev, NULL)) rdev->regmap = dev_get_regmap(dev, NULL); else if (dev->parent) rdev->regmap = dev_get_regmap(dev->parent, NULL); INIT_LIST_HEAD(&rdev->consumer_list); INIT_LIST_HEAD(&rdev->list); BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier); INIT_DELAYED_WORK(&rdev->disable_work, regulator_disable_work); if (init_data && init_data->supply_regulator) rdev->supply_name = init_data->supply_regulator; else if (regulator_desc->supply_name) rdev->supply_name = regulator_desc->supply_name; /* register with sysfs */ rdev->dev.parent = config->dev; dev_set_name(&rdev->dev, "regulator.%lu", (unsigned long) atomic_inc_return(®ulator_no)); /* set regulator constraints */ if (init_data) rdev->constraints = kmemdup(&init_data->constraints, sizeof(*rdev->constraints), GFP_KERNEL); else rdev->constraints = kzalloc(sizeof(*rdev->constraints), GFP_KERNEL); if (!rdev->constraints) { ret = -ENOMEM; goto wash; } if (regulator_desc->init_cb) { ret = regulator_desc->init_cb(rdev, config); if (ret < 0) goto wash; } if ((rdev->supply_name && !rdev->supply) && (rdev->constraints->always_on || rdev->constraints->boot_on)) { ret = regulator_resolve_supply(rdev); if (ret) rdev_dbg(rdev, "unable to resolve supply early: %pe\n", ERR_PTR(ret)); resolved_early = true; } if (config->ena_gpiod) { ret = regulator_ena_gpio_request(rdev, config); if (ret != 0) { rdev_err(rdev, "Failed to request enable GPIO: %pe\n", ERR_PTR(ret)); goto wash; } /* The regulator core took over the GPIO descriptor */ dangling_cfg_gpiod = false; dangling_of_gpiod = false; } ret = set_machine_constraints(rdev); if (ret == -EPROBE_DEFER && !resolved_early) { /* Regulator might be in bypass mode and so needs its supply * to set the constraints */ /* FIXME: this currently triggers a chicken-and-egg problem * when creating -SUPPLY symlink in sysfs to a regulator * that is just being created */ rdev_dbg(rdev, "will resolve supply early: %s\n", rdev->supply_name); ret = regulator_resolve_supply(rdev); if (!ret) ret = set_machine_constraints(rdev); else rdev_dbg(rdev, "unable to resolve supply early: %pe\n", ERR_PTR(ret)); } if (ret < 0) goto wash; ret = regulator_init_coupling(rdev); if (ret < 0) goto wash; /* add consumers devices */ if (init_data) { for (i = 0; i < init_data->num_consumer_supplies; i++) { ret = set_consumer_device_supply(rdev, init_data->consumer_supplies[i].dev_name, init_data->consumer_supplies[i].supply); if (ret < 0) { dev_err(dev, "Failed to set supply %s\n", init_data->consumer_supplies[i].supply); goto unset_supplies; } } } if (!rdev->desc->ops->get_voltage && !rdev->desc->ops->list_voltage && !rdev->desc->fixed_uV) rdev->is_switch = true; ret = device_add(&rdev->dev); if (ret != 0) goto unset_supplies; rdev_init_debugfs(rdev); /* try to resolve regulators coupling since a new one was registered */ mutex_lock(®ulator_list_mutex); regulator_resolve_coupling(rdev); mutex_unlock(®ulator_list_mutex); /* try to resolve regulators supply since a new one was registered */ class_for_each_device(®ulator_class, NULL, NULL, regulator_register_resolve_supply); kfree(config); return rdev; unset_supplies: mutex_lock(®ulator_list_mutex); unset_regulator_supplies(rdev); regulator_remove_coupling(rdev); mutex_unlock(®ulator_list_mutex); wash: regulator_put(rdev->supply); kfree(rdev->coupling_desc.coupled_rdevs); mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); kfree(config); put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(regulator_register); /** * regulator_unregister - unregister regulator * @rdev: regulator to unregister * * Called by regulator drivers to unregister a regulator. */ void regulator_unregister(struct regulator_dev *rdev) { if (rdev == NULL) return; if (rdev->supply) { while (rdev->use_count--) regulator_disable(rdev->supply); regulator_put(rdev->supply); } flush_work(&rdev->disable_work.work); mutex_lock(®ulator_list_mutex); WARN_ON(rdev->open_count); regulator_remove_coupling(rdev); unset_regulator_supplies(rdev); list_del(&rdev->list); regulator_ena_gpio_free(rdev); device_unregister(&rdev->dev); mutex_unlock(®ulator_list_mutex); } EXPORT_SYMBOL_GPL(regulator_unregister); #ifdef CONFIG_SUSPEND /** * regulator_suspend - prepare regulators for system wide suspend * @dev: ``&struct device`` pointer that is passed to _regulator_suspend() * * Configure each regulator with it's suspend operating parameters for state. * * Return: 0 on success or a negative error number on failure. */ static int regulator_suspend(struct device *dev) { struct regulator_dev *rdev = dev_to_rdev(dev); suspend_state_t state = pm_suspend_target_state; int ret; const struct regulator_state *rstate; rstate = regulator_get_suspend_state_check(rdev, state); if (!rstate) return 0; regulator_lock(rdev); ret = __suspend_set_state(rdev, rstate); regulator_unlock(rdev); return ret; } static int regulator_resume(struct device *dev) { suspend_state_t state = pm_suspend_target_state; struct regulator_dev *rdev = dev_to_rdev(dev); struct regulator_state *rstate; int ret = 0; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return 0; /* Avoid grabbing the lock if we don't need to */ if (!rdev->desc->ops->resume) return 0; regulator_lock(rdev); if (rstate->enabled == ENABLE_IN_SUSPEND || rstate->enabled == DISABLE_IN_SUSPEND) ret = rdev->desc->ops->resume(rdev); regulator_unlock(rdev); return ret; } #else /* !CONFIG_SUSPEND */ #define regulator_suspend NULL #define regulator_resume NULL #endif /* !CONFIG_SUSPEND */ #ifdef CONFIG_PM static const struct dev_pm_ops __maybe_unused regulator_pm_ops = { .suspend = regulator_suspend, .resume = regulator_resume, }; #endif const struct class regulator_class = { .name = "regulator", .dev_release = regulator_dev_release, .dev_groups = regulator_dev_groups, #ifdef CONFIG_PM .pm = ®ulator_pm_ops, #endif }; /** * regulator_has_full_constraints - the system has fully specified constraints * * Calling this function will cause the regulator API to disable all * regulators which have a zero use count and don't have an always_on * constraint in a late_initcall. * * The intention is that this will become the default behaviour in a * future kernel release so users are encouraged to use this facility * now. */ void regulator_has_full_constraints(void) { has_full_constraints = 1; } EXPORT_SYMBOL_GPL(regulator_has_full_constraints); /** * rdev_get_drvdata - get rdev regulator driver data * @rdev: regulator * * Get rdev regulator driver private data. This call can be used in the * regulator driver context. * * Return: Pointer to regulator driver private data. */ void *rdev_get_drvdata(struct regulator_dev *rdev) { return rdev->reg_data; } EXPORT_SYMBOL_GPL(rdev_get_drvdata); /** * regulator_get_drvdata - get regulator driver data * @regulator: regulator * * Get regulator driver private data. This call can be used in the consumer * driver context when non API regulator specific functions need to be called. * * Return: Pointer to regulator driver private data. */ void *regulator_get_drvdata(struct regulator *regulator) { return regulator->rdev->reg_data; } EXPORT_SYMBOL_GPL(regulator_get_drvdata); /** * regulator_set_drvdata - set regulator driver data * @regulator: regulator * @data: data */ void regulator_set_drvdata(struct regulator *regulator, void *data) { regulator->rdev->reg_data = data; } EXPORT_SYMBOL_GPL(regulator_set_drvdata); /** * rdev_get_id - get regulator ID * @rdev: regulator * * Return: Regulator ID for @rdev. */ int rdev_get_id(struct regulator_dev *rdev) { return rdev->desc->id; } EXPORT_SYMBOL_GPL(rdev_get_id); struct device *rdev_get_dev(struct regulator_dev *rdev) { return &rdev->dev; } EXPORT_SYMBOL_GPL(rdev_get_dev); struct regmap *rdev_get_regmap(struct regulator_dev *rdev) { return rdev->regmap; } EXPORT_SYMBOL_GPL(rdev_get_regmap); void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data) { return reg_init_data->driver_data; } EXPORT_SYMBOL_GPL(regulator_get_init_drvdata); #ifdef CONFIG_DEBUG_FS static int supply_map_show(struct seq_file *sf, void *data) { struct regulator_map *map; list_for_each_entry(map, ®ulator_map_list, list) { seq_printf(sf, "%s -> %s.%s\n", rdev_get_name(map->regulator), map->dev_name, map->supply); } return 0; } DEFINE_SHOW_ATTRIBUTE(supply_map); struct summary_data { struct seq_file *s; struct regulator_dev *parent; int level; }; static void regulator_summary_show_subtree(struct seq_file *s, struct regulator_dev *rdev, int level); static int regulator_summary_show_children(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct summary_data *summary_data = data; if (rdev->supply && rdev->supply->rdev == summary_data->parent) regulator_summary_show_subtree(summary_data->s, rdev, summary_data->level + 1); return 0; } static void regulator_summary_show_subtree(struct seq_file *s, struct regulator_dev *rdev, int level) { struct regulation_constraints *c; struct regulator *consumer; struct summary_data summary_data; unsigned int opmode; if (!rdev) return; opmode = _regulator_get_mode_unlocked(rdev); seq_printf(s, "%*s%-*s %3d %4d %6d %7s ", level * 3 + 1, "", 30 - level * 3, rdev_get_name(rdev), rdev->use_count, rdev->open_count, rdev->bypass_count, regulator_opmode_to_str(opmode)); seq_printf(s, "%5dmV ", regulator_get_voltage_rdev(rdev) / 1000); seq_printf(s, "%5dmA ", _regulator_get_current_limit_unlocked(rdev) / 1000); c = rdev->constraints; if (c) { switch (rdev->desc->type) { case REGULATOR_VOLTAGE: seq_printf(s, "%5dmV %5dmV ", c->min_uV / 1000, c->max_uV / 1000); break; case REGULATOR_CURRENT: seq_printf(s, "%5dmA %5dmA ", c->min_uA / 1000, c->max_uA / 1000); break; } } seq_puts(s, "\n"); list_for_each_entry(consumer, &rdev->consumer_list, list) { if (consumer->dev && consumer->dev->class == ®ulator_class) continue; seq_printf(s, "%*s%-*s ", (level + 1) * 3 + 1, "", 30 - (level + 1) * 3, consumer->supply_name ? consumer->supply_name : consumer->dev ? dev_name(consumer->dev) : "deviceless"); switch (rdev->desc->type) { case REGULATOR_VOLTAGE: seq_printf(s, "%3d %33dmA%c%5dmV %5dmV", consumer->enable_count, consumer->uA_load / 1000, consumer->uA_load && !consumer->enable_count ? '*' : ' ', consumer->voltage[PM_SUSPEND_ON].min_uV / 1000, consumer->voltage[PM_SUSPEND_ON].max_uV / 1000); break; case REGULATOR_CURRENT: break; } seq_puts(s, "\n"); } summary_data.s = s; summary_data.level = level; summary_data.parent = rdev; class_for_each_device(®ulator_class, NULL, &summary_data, regulator_summary_show_children); } struct summary_lock_data { struct ww_acquire_ctx *ww_ctx; struct regulator_dev **new_contended_rdev; struct regulator_dev **old_contended_rdev; }; static int regulator_summary_lock_one(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct summary_lock_data *lock_data = data; int ret = 0; if (rdev != *lock_data->old_contended_rdev) { ret = regulator_lock_nested(rdev, lock_data->ww_ctx); if (ret == -EDEADLK) *lock_data->new_contended_rdev = rdev; else WARN_ON_ONCE(ret); } else { *lock_data->old_contended_rdev = NULL; } return ret; } static int regulator_summary_unlock_one(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct summary_lock_data *lock_data = data; if (lock_data) { if (rdev == *lock_data->new_contended_rdev) return -EDEADLK; } regulator_unlock(rdev); return 0; } static int regulator_summary_lock_all(struct ww_acquire_ctx *ww_ctx, struct regulator_dev **new_contended_rdev, struct regulator_dev **old_contended_rdev) { struct summary_lock_data lock_data; int ret; lock_data.ww_ctx = ww_ctx; lock_data.new_contended_rdev = new_contended_rdev; lock_data.old_contended_rdev = old_contended_rdev; ret = class_for_each_device(®ulator_class, NULL, &lock_data, regulator_summary_lock_one); if (ret) class_for_each_device(®ulator_class, NULL, &lock_data, regulator_summary_unlock_one); return ret; } static void regulator_summary_lock(struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *new_contended_rdev = NULL; struct regulator_dev *old_contended_rdev = NULL; int err; mutex_lock(®ulator_list_mutex); ww_acquire_init(ww_ctx, ®ulator_ww_class); do { if (new_contended_rdev) { ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx); old_contended_rdev = new_contended_rdev; old_contended_rdev->ref_cnt++; old_contended_rdev->mutex_owner = current; } err = regulator_summary_lock_all(ww_ctx, &new_contended_rdev, &old_contended_rdev); if (old_contended_rdev) regulator_unlock(old_contended_rdev); } while (err == -EDEADLK); ww_acquire_done(ww_ctx); } static void regulator_summary_unlock(struct ww_acquire_ctx *ww_ctx) { class_for_each_device(®ulator_class, NULL, NULL, regulator_summary_unlock_one); ww_acquire_fini(ww_ctx); mutex_unlock(®ulator_list_mutex); } static int regulator_summary_show_roots(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct seq_file *s = data; if (!rdev->supply) regulator_summary_show_subtree(s, rdev, 0); return 0; } static int regulator_summary_show(struct seq_file *s, void *data) { struct ww_acquire_ctx ww_ctx; seq_puts(s, " regulator use open bypass opmode voltage current min max\n"); seq_puts(s, "---------------------------------------------------------------------------------------\n"); regulator_summary_lock(&ww_ctx); class_for_each_device(®ulator_class, NULL, s, regulator_summary_show_roots); regulator_summary_unlock(&ww_ctx); return 0; } DEFINE_SHOW_ATTRIBUTE(regulator_summary); #endif /* CONFIG_DEBUG_FS */ static int __init regulator_init(void) { int ret; ret = class_register(®ulator_class); debugfs_root = debugfs_create_dir("regulator", NULL); if (IS_ERR(debugfs_root)) pr_debug("regulator: Failed to create debugfs directory\n"); #ifdef CONFIG_DEBUG_FS debugfs_create_file("supply_map", 0444, debugfs_root, NULL, &supply_map_fops); debugfs_create_file("regulator_summary", 0444, debugfs_root, NULL, ®ulator_summary_fops); #endif regulator_dummy_init(); regulator_coupler_register(&generic_regulator_coupler); return ret; } /* init early to allow our consumers to complete system booting */ core_initcall(regulator_init); static int regulator_late_cleanup(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct regulation_constraints *c = rdev->constraints; int ret; if (c && c->always_on) return 0; if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) return 0; regulator_lock(rdev); if (rdev->use_count) goto unlock; /* If reading the status failed, assume that it's off. */ if (_regulator_is_enabled(rdev) <= 0) goto unlock; if (have_full_constraints()) { /* We log since this may kill the system if it goes * wrong. */ rdev_info(rdev, "disabling\n"); ret = _regulator_do_disable(rdev); if (ret != 0) rdev_err(rdev, "couldn't disable: %pe\n", ERR_PTR(ret)); } else { /* The intention is that in future we will * assume that full constraints are provided * so warn even if we aren't going to do * anything here. */ rdev_warn(rdev, "incomplete constraints, leaving on\n"); } unlock: regulator_unlock(rdev); return 0; } static bool regulator_ignore_unused; static int __init regulator_ignore_unused_setup(char *__unused) { regulator_ignore_unused = true; return 1; } __setup("regulator_ignore_unused", regulator_ignore_unused_setup); static void regulator_init_complete_work_function(struct work_struct *work) { /* * Regulators may had failed to resolve their input supplies * when were registered, either because the input supply was * not registered yet or because its parent device was not * bound yet. So attempt to resolve the input supplies for * pending regulators before trying to disable unused ones. */ class_for_each_device(®ulator_class, NULL, NULL, regulator_register_resolve_supply); /* * For debugging purposes, it may be useful to prevent unused * regulators from being disabled. */ if (regulator_ignore_unused) { pr_warn("regulator: Not disabling unused regulators\n"); return; } /* If we have a full configuration then disable any regulators * we have permission to change the status for and which are * not in use or always_on. This is effectively the default * for DT and ACPI as they have full constraints. */ class_for_each_device(®ulator_class, NULL, NULL, regulator_late_cleanup); } static DECLARE_DELAYED_WORK(regulator_init_complete_work, regulator_init_complete_work_function); static int __init regulator_init_complete(void) { /* * Since DT doesn't provide an idiomatic mechanism for * enabling full constraints and since it's much more natural * with DT to provide them just assume that a DT enabled * system has full constraints. */ if (of_have_populated_dt()) has_full_constraints = true; /* * We punt completion for an arbitrary amount of time since * systems like distros will load many drivers from userspace * so consumers might not always be ready yet, this is * particularly an issue with laptops where this might bounce * the display off then on. Ideally we'd get a notification * from userspace when this happens but we don't so just wait * a bit and hope we waited long enough. It'd be better if * we'd only do this on systems that need it, and a kernel * command line option might be useful. */ schedule_delayed_work(®ulator_init_complete_work, msecs_to_jiffies(30000)); return 0; } late_initcall_sync(regulator_init_complete); |
| 12 2 10 3 9 12 3 2 2 3 2 3 3 3 3 3 11 6 9 3 1 10 2 3 10 1 1 3 3 6 6 6 9 2 1 3 5 2 2 2 10 2 8 2 2 15 2 8 7 9 2 17 12 5 17 3 2 10 11 5 6 11 10 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 | // SPDX-License-Identifier: GPL-2.0 #include <linux/blkdev.h> #include <linux/iversion.h> #include "ctree.h" #include "fs.h" #include "messages.h" #include "compression.h" #include "delalloc-space.h" #include "disk-io.h" #include "reflink.h" #include "transaction.h" #include "subpage.h" #include "accessors.h" #include "file-item.h" #include "file.h" #include "super.h" #define BTRFS_MAX_DEDUPE_LEN SZ_16M static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, const u64 destoff, const u64 olen, bool no_time_update) { int ret; inode_inc_iversion(inode); if (!no_time_update) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); } /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. */ if (endoff > destoff + olen) endoff = destoff + olen; if (endoff > inode->i_size) { i_size_write(inode, endoff); btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0); } ret = btrfs_update_inode(trans, BTRFS_I(inode)); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; } return btrfs_end_transaction(trans); } static int copy_inline_to_page(struct btrfs_inode *inode, const u64 file_offset, char *inline_data, const u64 size, const u64 datal, const u8 comp_type) { struct btrfs_fs_info *fs_info = inode->root->fs_info; const u32 block_size = fs_info->sectorsize; const u64 range_end = file_offset + block_size - 1; const size_t inline_size = size - btrfs_file_extent_calc_inline_size(0); char *data_start = inline_data + btrfs_file_extent_calc_inline_size(0); struct extent_changeset *data_reserved = NULL; struct folio *folio = NULL; struct address_space *mapping = inode->vfs_inode.i_mapping; int ret; ASSERT(IS_ALIGNED(file_offset, block_size)); /* * We have flushed and locked the ranges of the source and destination * inodes, we also have locked the inodes, so we are safe to do a * reservation here. Also we must not do the reservation while holding * a transaction open, otherwise we would deadlock. */ ret = btrfs_delalloc_reserve_space(inode, &data_reserved, file_offset, block_size); if (ret) goto out; folio = __filemap_get_folio(mapping, file_offset >> PAGE_SHIFT, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, btrfs_alloc_write_mask(mapping)); if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto out_unlock; } ret = set_folio_extent_mapped(folio); if (ret < 0) goto out_unlock; btrfs_clear_extent_bit(&inode->io_tree, file_offset, range_end, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, NULL); ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL); if (ret) goto out_unlock; /* * After dirtying the page our caller will need to start a transaction, * and if we are low on metadata free space, that can cause flushing of * delalloc for all inodes in order to get metadata space released. * However we are holding the range locked for the whole duration of * the clone/dedupe operation, so we may deadlock if that happens and no * other task releases enough space. So mark this inode as not being * possible to flush to avoid such deadlock. We will clear that flag * when we finish cloning all extents, since a transaction is started * after finding each extent to clone. */ set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags); if (comp_type == BTRFS_COMPRESS_NONE) { memcpy_to_folio(folio, offset_in_folio(folio, file_offset), data_start, datal); } else { ret = btrfs_decompress(comp_type, data_start, folio, offset_in_folio(folio, file_offset), inline_size, datal); if (ret) goto out_unlock; flush_dcache_folio(folio); } /* * If our inline data is smaller then the block/page size, then the * remaining of the block/page is equivalent to zeroes. We had something * like the following done: * * $ xfs_io -f -c "pwrite -S 0xab 0 500" file * $ sync # (or fsync) * $ xfs_io -c "falloc 0 4K" file * $ xfs_io -c "pwrite -S 0xcd 4K 4K" * * So what's in the range [500, 4095] corresponds to zeroes. */ if (datal < block_size) folio_zero_range(folio, datal, block_size - datal); btrfs_folio_set_uptodate(fs_info, folio, file_offset, block_size); btrfs_folio_clear_checked(fs_info, folio, file_offset, block_size); btrfs_folio_set_dirty(fs_info, folio, file_offset, block_size); out_unlock: if (!IS_ERR(folio)) { folio_unlock(folio); folio_put(folio); } if (ret) btrfs_delalloc_release_space(inode, data_reserved, file_offset, block_size, true); btrfs_delalloc_release_extents(inode, block_size); out: extent_changeset_free(data_reserved); return ret; } /* * Deal with cloning of inline extents. We try to copy the inline extent from * the source inode to destination inode when possible. When not possible we * copy the inline extent's data into the respective page of the inode. */ static int clone_copy_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_key *new_key, const u64 drop_start, const u64 datal, const u64 size, const u8 comp_type, char *inline_data, struct btrfs_trans_handle **trans_out) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; const u64 aligned_end = ALIGN(new_key->offset + datal, fs_info->sectorsize); struct btrfs_trans_handle *trans = NULL; struct btrfs_drop_extents_args drop_args = { 0 }; int ret; struct btrfs_key key; if (new_key->offset > 0) { ret = copy_inline_to_page(inode, new_key->offset, inline_data, size, datal, comp_type); goto out; } key.objectid = btrfs_ino(inode); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { return ret; } else if (ret > 0) { if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret < 0) return ret; else if (ret > 0) goto copy_inline_extent; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid == btrfs_ino(inode) && key.type == BTRFS_EXTENT_DATA_KEY) { /* * There's an implicit hole at file offset 0, copy the * inline extent's data to the page. */ ASSERT(key.offset > 0); goto copy_to_page; } } else if (i_size_read(&inode->vfs_inode) <= datal) { struct btrfs_file_extent_item *ei; ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_file_extent_item); /* * If it's an inline extent replace it with the source inline * extent, otherwise copy the source inline extent data into * the respective page at the destination inode. */ if (btrfs_file_extent_type(path->nodes[0], ei) == BTRFS_FILE_EXTENT_INLINE) goto copy_inline_extent; goto copy_to_page; } copy_inline_extent: /* * We have no extent items, or we have an extent at offset 0 which may * or may not be inlined. All these cases are dealt the same way. */ if (i_size_read(&inode->vfs_inode) > datal) { /* * At the destination offset 0 we have either a hole, a regular * extent or an inline extent larger then the one we want to * clone. Deal with all these cases by copying the inline extent * data into the respective page at the destination inode. */ goto copy_to_page; } /* * Release path before starting a new transaction so we don't hold locks * that would confuse lockdep. */ btrfs_release_path(path); /* * If we end up here it means were copy the inline extent into a leaf * of the destination inode. We know we will drop or adjust at most one * extent item in the destination root. * * 1 unit - adjusting old extent (we may have to split it) * 1 unit - add new extent * 1 unit - inode update */ trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; goto out; } drop_args.path = path; drop_args.start = drop_start; drop_args.end = aligned_end; drop_args.drop_cache = true; ret = btrfs_drop_extents(trans, root, inode, &drop_args); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto out; } ret = btrfs_insert_empty_item(trans, root, path, new_key, size); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); goto out; } write_extent_buffer(path->nodes[0], inline_data, btrfs_item_ptr_offset(path->nodes[0], path->slots[0]), size); btrfs_update_inode_bytes(inode, datal, drop_args.bytes_found); btrfs_set_inode_full_sync(inode); ret = btrfs_inode_set_file_extent_range(inode, 0, aligned_end); if (unlikely(ret)) btrfs_abort_transaction(trans, ret); out: if (!ret && !trans) { /* * No transaction here means we copied the inline extent into a * page of the destination inode. * * 1 unit to update inode item */ trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; } } if (ret && trans) btrfs_end_transaction(trans); if (!ret) *trans_out = trans; return ret; copy_to_page: /* * Release our path because we don't need it anymore and also because * copy_inline_to_page() needs to reserve data and metadata, which may * need to flush delalloc when we are low on available space and * therefore cause a deadlock if writeback of an inline extent needs to * write to the same leaf or an ordered extent completion needs to write * to the same leaf. */ btrfs_release_path(path); ret = copy_inline_to_page(inode, new_key->offset, inline_data, size, datal, comp_type); goto out; } /* * Clone a range from inode file to another. * * @src: Inode to clone from * @inode: Inode to clone to * @off: Offset within source to start clone from * @olen: Original length, passed by user, of range to clone * @olen_aligned: Block-aligned value of olen * @destoff: Offset within @inode to start clone * @no_time_update: Whether to update mtime/ctime on the target inode */ static int btrfs_clone(struct inode *src, struct inode *inode, const u64 off, const u64 olen, const u64 olen_aligned, const u64 destoff, bool no_time_update) { struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_trans_handle *trans; char *buf = NULL; struct btrfs_key key; u32 nritems; int slot; int ret; const u64 len = olen_aligned; u64 last_dest_end = destoff; u64 prev_extent_end = off; ret = -ENOMEM; buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); if (!buf) return ret; path = btrfs_alloc_path(); if (!path) { kvfree(buf); return ret; } path->reada = READA_FORWARD; /* Clone data */ key.objectid = btrfs_ino(BTRFS_I(src)); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = off; while (1) { struct btrfs_file_extent_item *extent; u64 extent_gen; int type; u32 size; struct btrfs_key new_key; u64 disko = 0, diskl = 0; u64 datao = 0, datal = 0; u8 comp; u64 drop_start; /* Note the key will change type as we walk through the tree */ ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 0, 0); if (ret < 0) goto out; /* * First search, if no extent item that starts at offset off was * found but the previous item is an extent item, it's possible * it might overlap our target range, therefore process it. */ if (key.offset == off && ret > 0 && path->slots[0] > 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); if (key.type == BTRFS_EXTENT_DATA_KEY) path->slots[0]--; } nritems = btrfs_header_nritems(path->nodes[0]); process_slot: if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(BTRFS_I(src)->root, path); if (ret < 0) goto out; if (ret > 0) break; nritems = btrfs_header_nritems(path->nodes[0]); } leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.type > BTRFS_EXTENT_DATA_KEY || key.objectid != btrfs_ino(BTRFS_I(src))) break; ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); extent_gen = btrfs_file_extent_generation(leaf, extent); comp = btrfs_file_extent_compression(leaf, extent); type = btrfs_file_extent_type(leaf, extent); if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { disko = btrfs_file_extent_disk_bytenr(leaf, extent); diskl = btrfs_file_extent_disk_num_bytes(leaf, extent); datao = btrfs_file_extent_offset(leaf, extent); datal = btrfs_file_extent_num_bytes(leaf, extent); } else if (type == BTRFS_FILE_EXTENT_INLINE) { /* Take upper bound, may be compressed */ datal = btrfs_file_extent_ram_bytes(leaf, extent); } /* * The first search might have left us at an extent item that * ends before our target range's start, can happen if we have * holes and NO_HOLES feature enabled. * * Subsequent searches may leave us on a file range we have * processed before - this happens due to a race with ordered * extent completion for a file range that is outside our source * range, but that range was part of a file extent item that * also covered a leading part of our source range. */ if (key.offset + datal <= prev_extent_end) { path->slots[0]++; goto process_slot; } else if (key.offset >= off + len) { break; } prev_extent_end = key.offset + datal; size = btrfs_item_size(leaf, slot); read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot), size); btrfs_release_path(path); memcpy(&new_key, &key, sizeof(new_key)); new_key.objectid = btrfs_ino(BTRFS_I(inode)); if (off <= key.offset) new_key.offset = key.offset + destoff - off; else new_key.offset = destoff; /* * Deal with a hole that doesn't have an extent item that * represents it (NO_HOLES feature enabled). * This hole is either in the middle of the cloning range or at * the beginning (fully overlaps it or partially overlaps it). */ if (new_key.offset != last_dest_end) drop_start = last_dest_end; else drop_start = new_key.offset; if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { struct btrfs_replace_extent_info clone_info; /* * a | --- range to clone ---| b * | ------------- extent ------------- | */ /* Subtract range b */ if (key.offset + datal > off + len) datal = off + len - key.offset; /* Subtract range a */ if (off > key.offset) { datao += off - key.offset; datal -= off - key.offset; } clone_info.disk_offset = disko; clone_info.disk_len = diskl; clone_info.data_offset = datao; clone_info.data_len = datal; clone_info.file_offset = new_key.offset; clone_info.extent_buf = buf; clone_info.is_new_extent = false; clone_info.update_times = !no_time_update; ret = btrfs_replace_file_extents(BTRFS_I(inode), path, drop_start, new_key.offset + datal - 1, &clone_info, &trans); if (ret) goto out; } else { ASSERT(type == BTRFS_FILE_EXTENT_INLINE); /* * Inline extents always have to start at file offset 0 * and can never be bigger then the sector size. We can * never clone only parts of an inline extent, since all * reflink operations must start at a sector size aligned * offset, and the length must be aligned too or end at * the i_size (which implies the whole inlined data). */ ASSERT(key.offset == 0); ASSERT(datal <= fs_info->sectorsize); if (WARN_ON(type != BTRFS_FILE_EXTENT_INLINE) || WARN_ON(key.offset != 0) || WARN_ON(datal > fs_info->sectorsize)) { ret = -EUCLEAN; goto out; } ret = clone_copy_inline_extent(BTRFS_I(inode), path, &new_key, drop_start, datal, size, comp, buf, &trans); if (ret) goto out; } btrfs_release_path(path); /* * Whenever we share an extent we update the last_reflink_trans * of each inode to the current transaction. This is needed to * make sure fsync does not log multiple checksum items with * overlapping ranges (because some extent items might refer * only to sections of the original extent). For the destination * inode we do this regardless of the generation of the extents * or even if they are inline extents or explicit holes, to make * sure a full fsync does not skip them. For the source inode, * we only need to update last_reflink_trans in case it's a new * extent that is not a hole or an inline extent, to deal with * the checksums problem on fsync. */ if (extent_gen == trans->transid && disko > 0) BTRFS_I(src)->last_reflink_trans = trans->transid; BTRFS_I(inode)->last_reflink_trans = trans->transid; last_dest_end = ALIGN(new_key.offset + datal, fs_info->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, destoff, olen, no_time_update); if (ret) goto out; if (new_key.offset + datal >= destoff + len) break; btrfs_release_path(path); key.offset = prev_extent_end; if (fatal_signal_pending(current)) { ret = -EINTR; goto out; } cond_resched(); } ret = 0; if (last_dest_end < destoff + len) { /* * We have an implicit hole that fully or partially overlaps our * cloning range at its end. This means that we either have the * NO_HOLES feature enabled or the implicit hole happened due to * mixing buffered and direct IO writes against this file. */ btrfs_release_path(path); /* * When using NO_HOLES and we are cloning a range that covers * only a hole (no extents) into a range beyond the current * i_size, punching a hole in the target range will not create * an extent map defining a hole, because the range starts at or * beyond current i_size. If the file previously had an i_size * greater than the new i_size set by this clone operation, we * need to make sure the next fsync is a full fsync, so that it * detects and logs a hole covering a range from the current * i_size to the new i_size. If the clone range covers extents, * besides a hole, then we know the full sync flag was already * set by previous calls to btrfs_replace_file_extents() that * replaced file extent items. */ if (last_dest_end >= i_size_read(inode)) btrfs_set_inode_full_sync(BTRFS_I(inode)); ret = btrfs_replace_file_extents(BTRFS_I(inode), path, last_dest_end, destoff + len - 1, NULL, &trans); if (ret) goto out; ret = clone_finish_inode_update(trans, inode, destoff + len, destoff, olen, no_time_update); } out: kvfree(buf); clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags); return ret; } static void btrfs_double_mmap_lock(struct btrfs_inode *inode1, struct btrfs_inode *inode2) { if (inode1 < inode2) swap(inode1, inode2); down_write(&inode1->i_mmap_lock); down_write_nested(&inode2->i_mmap_lock, SINGLE_DEPTH_NESTING); } static void btrfs_double_mmap_unlock(struct btrfs_inode *inode1, struct btrfs_inode *inode2) { up_write(&inode1->i_mmap_lock); up_write(&inode2->i_mmap_lock); } static int btrfs_extent_same_range(struct btrfs_inode *src, u64 loff, u64 len, struct btrfs_inode *dst, u64 dst_loff) { const u64 end = dst_loff + len - 1; struct extent_state *cached_state = NULL; struct btrfs_fs_info *fs_info = src->root->fs_info; const u64 bs = fs_info->sectorsize; int ret; /* * Lock destination range to serialize with concurrent readahead(), and * we are safe from concurrency with relocation of source extents * because we have already locked the inode's i_mmap_lock in exclusive * mode. */ btrfs_lock_extent(&dst->io_tree, dst_loff, end, &cached_state); ret = btrfs_clone(&src->vfs_inode, &dst->vfs_inode, loff, len, ALIGN(len, bs), dst_loff, 1); btrfs_unlock_extent(&dst->io_tree, dst_loff, end, &cached_state); btrfs_btree_balance_dirty(fs_info); return ret; } static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, struct inode *dst, u64 dst_loff) { int ret = 0; u64 i, tail_len, chunk_count; struct btrfs_root *root_dst = BTRFS_I(dst)->root; spin_lock(&root_dst->root_item_lock); if (root_dst->send_in_progress) { btrfs_warn_rl(root_dst->fs_info, "cannot deduplicate to root %llu while send operations are using it (%d in progress)", btrfs_root_id(root_dst), root_dst->send_in_progress); spin_unlock(&root_dst->root_item_lock); return -EAGAIN; } root_dst->dedupe_in_progress++; spin_unlock(&root_dst->root_item_lock); tail_len = olen % BTRFS_MAX_DEDUPE_LEN; chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); for (i = 0; i < chunk_count; i++) { ret = btrfs_extent_same_range(BTRFS_I(src), loff, BTRFS_MAX_DEDUPE_LEN, BTRFS_I(dst), dst_loff); if (ret) goto out; loff += BTRFS_MAX_DEDUPE_LEN; dst_loff += BTRFS_MAX_DEDUPE_LEN; } if (tail_len > 0) ret = btrfs_extent_same_range(BTRFS_I(src), loff, tail_len, BTRFS_I(dst), dst_loff); out: spin_lock(&root_dst->root_item_lock); root_dst->dedupe_in_progress--; spin_unlock(&root_dst->root_item_lock); return ret; } static noinline int btrfs_clone_files(struct file *file, struct file *file_src, u64 off, u64 olen, u64 destoff) { struct extent_state *cached_state = NULL; struct inode *inode = file_inode(file); struct inode *src = file_inode(file_src); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); int ret; int wb_ret; u64 len = olen; u64 bs = fs_info->sectorsize; u64 end; /* * VFS's generic_remap_file_range_prep() protects us from cloning the * eof block into the middle of a file, which would result in corruption * if the file size is not blocksize aligned. So we don't need to check * for that case here. */ if (off + len == src->i_size) len = ALIGN(src->i_size, bs) - off; if (destoff > inode->i_size) { const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); ret = btrfs_cont_expand(BTRFS_I(inode), inode->i_size, destoff); if (ret) return ret; /* * We may have truncated the last block if the inode's size is * not sector size aligned, so we need to wait for writeback to * complete before proceeding further, otherwise we can race * with cloning and attempt to increment a reference to an * extent that no longer exists (writeback completed right after * we found the previous extent covering eof and before we * attempted to increment its reference count). */ ret = btrfs_wait_ordered_range(BTRFS_I(inode), wb_start, destoff - wb_start); if (ret) return ret; } /* * Lock destination range to serialize with concurrent readahead(), and * we are safe from concurrency with relocation of source extents * because we have already locked the inode's i_mmap_lock in exclusive * mode. */ end = destoff + len - 1; btrfs_lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state); ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state); /* * We may have copied an inline extent into a page of the destination * range, so wait for writeback to complete before truncating pages * from the page cache. This is a rare case. */ wb_ret = btrfs_wait_ordered_range(BTRFS_I(inode), destoff, len); ret = ret ? ret : wb_ret; /* * Truncate page cache pages so that future reads will see the cloned * data immediately and not the previous data. */ truncate_inode_pages_range(&inode->i_data, round_down(destoff, PAGE_SIZE), round_up(destoff + len, PAGE_SIZE) - 1); btrfs_btree_balance_dirty(fs_info); return ret; } static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags) { struct btrfs_inode *inode_in = BTRFS_I(file_inode(file_in)); struct btrfs_inode *inode_out = BTRFS_I(file_inode(file_out)); u64 bs = inode_out->root->fs_info->sectorsize; u64 wb_len; int ret; if (!(remap_flags & REMAP_FILE_DEDUP)) { struct btrfs_root *root_out = inode_out->root; if (btrfs_root_readonly(root_out)) return -EROFS; ASSERT(inode_in->vfs_inode.i_sb == inode_out->vfs_inode.i_sb); } /* Don't make the dst file partly checksummed */ if ((inode_in->flags & BTRFS_INODE_NODATASUM) != (inode_out->flags & BTRFS_INODE_NODATASUM)) { return -EINVAL; } /* * Now that the inodes are locked, we need to start writeback ourselves * and can not rely on the writeback from the VFS's generic helper * generic_remap_file_range_prep() because: * * 1) For compression we must call filemap_fdatawrite_range() range * twice (btrfs_fdatawrite_range() does it for us), and the generic * helper only calls it once; * * 2) filemap_fdatawrite_range(), called by the generic helper only * waits for the writeback to complete, i.e. for IO to be done, and * not for the ordered extents to complete. We need to wait for them * to complete so that new file extent items are in the fs tree. */ if (*len == 0 && !(remap_flags & REMAP_FILE_DEDUP)) wb_len = ALIGN(inode_in->vfs_inode.i_size, bs) - ALIGN_DOWN(pos_in, bs); else wb_len = ALIGN(*len, bs); /* * Workaround to make sure NOCOW buffered write reach disk as NOCOW. * * Btrfs' back references do not have a block level granularity, they * work at the whole extent level. * NOCOW buffered write without data space reserved may not be able * to fall back to CoW due to lack of data space, thus could cause * data loss. * * Here we take a shortcut by flushing the whole inode, so that all * nocow write should reach disk as nocow before we increase the * reference of the extent. We could do better by only flushing NOCOW * data, but that needs extra accounting. * * Also we don't need to check ASYNC_EXTENT, as async extent will be * CoWed anyway, not affecting nocow part. */ ret = filemap_flush(inode_in->vfs_inode.i_mapping); if (ret < 0) return ret; ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), wb_len); if (ret < 0) return ret; ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), wb_len); if (ret < 0) return ret; return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags); } static bool file_sync_write(const struct file *file) { if (file->f_flags & (__O_SYNC | O_DSYNC)) return true; if (IS_SYNC(file_inode(file))) return true; return false; } loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) { struct btrfs_inode *src_inode = BTRFS_I(file_inode(src_file)); struct btrfs_inode *dst_inode = BTRFS_I(file_inode(dst_file)); bool same_inode = dst_inode == src_inode; int ret; if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; if (same_inode) { btrfs_inode_lock(src_inode, BTRFS_ILOCK_MMAP); } else { lock_two_nondirectories(&src_inode->vfs_inode, &dst_inode->vfs_inode); btrfs_double_mmap_lock(src_inode, dst_inode); } ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff, &len, remap_flags); if (ret < 0 || len == 0) goto out_unlock; if (remap_flags & REMAP_FILE_DEDUP) ret = btrfs_extent_same(&src_inode->vfs_inode, off, len, &dst_inode->vfs_inode, destoff); else ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); out_unlock: if (same_inode) { btrfs_inode_unlock(src_inode, BTRFS_ILOCK_MMAP); } else { btrfs_double_mmap_unlock(src_inode, dst_inode); unlock_two_nondirectories(&src_inode->vfs_inode, &dst_inode->vfs_inode); } /* * If either the source or the destination file was opened with O_SYNC, * O_DSYNC or has the S_SYNC attribute, fsync both the destination and * source files/ranges, so that after a successful return (0) followed * by a power failure results in the reflinked data to be readable from * both files/ranges. */ if (ret == 0 && len > 0 && (file_sync_write(src_file) || file_sync_write(dst_file))) { ret = btrfs_sync_file(src_file, off, off + len - 1, 0); if (ret == 0) ret = btrfs_sync_file(dst_file, destoff, destoff + len - 1, 0); } return ret < 0 ? ret : len; } |
| 2 2 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Digianswer Bluetooth USB driver * * Copyright (C) 2004-2007 Marcel Holtmann <marcel@holtmann.org> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/usb.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include "hci_uart.h" #define VERSION "0.11" static const struct usb_device_id bpa10x_table[] = { /* Tektronix BPA 100/105 (Digianswer) */ { USB_DEVICE(0x08fd, 0x0002) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, bpa10x_table); struct bpa10x_data { struct hci_dev *hdev; struct usb_device *udev; struct usb_anchor tx_anchor; struct usb_anchor rx_anchor; struct sk_buff *rx_skb[2]; }; static void bpa10x_tx_complete(struct urb *urb) { struct sk_buff *skb = urb->context; struct hci_dev *hdev = (struct hci_dev *) skb->dev; BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status, urb->actual_length); if (!test_bit(HCI_RUNNING, &hdev->flags)) goto done; if (!urb->status) hdev->stat.byte_tx += urb->transfer_buffer_length; else hdev->stat.err_tx++; done: kfree(urb->setup_packet); kfree_skb(skb); } #define HCI_VENDOR_HDR_SIZE 5 #define HCI_RECV_VENDOR \ .type = HCI_VENDOR_PKT, \ .hlen = HCI_VENDOR_HDR_SIZE, \ .loff = 3, \ .lsize = 2, \ .maxlen = HCI_MAX_FRAME_SIZE static const struct h4_recv_pkt bpa10x_recv_pkts[] = { { H4_RECV_ACL, .recv = hci_recv_frame }, { H4_RECV_SCO, .recv = hci_recv_frame }, { H4_RECV_EVENT, .recv = hci_recv_frame }, { HCI_RECV_VENDOR, .recv = hci_recv_diag }, }; static void bpa10x_rx_complete(struct urb *urb) { struct hci_dev *hdev = urb->context; struct bpa10x_data *data = hci_get_drvdata(hdev); int err; BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status, urb->actual_length); if (!test_bit(HCI_RUNNING, &hdev->flags)) return; if (urb->status == 0) { bool idx = usb_pipebulk(urb->pipe); data->rx_skb[idx] = h4_recv_buf(hdev, data->rx_skb[idx], urb->transfer_buffer, urb->actual_length, bpa10x_recv_pkts, ARRAY_SIZE(bpa10x_recv_pkts)); if (IS_ERR(data->rx_skb[idx])) { bt_dev_err(hdev, "corrupted event packet"); hdev->stat.err_rx++; data->rx_skb[idx] = NULL; } } usb_anchor_urb(urb, &data->rx_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { bt_dev_err(hdev, "urb %p failed to resubmit (%d)", urb, -err); usb_unanchor_urb(urb); } } static inline int bpa10x_submit_intr_urb(struct hci_dev *hdev) { struct bpa10x_data *data = hci_get_drvdata(hdev); struct urb *urb; unsigned char *buf; unsigned int pipe; int err, size = 16; BT_DBG("%s", hdev->name); urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return -ENOMEM; buf = kmalloc(size, GFP_KERNEL); if (!buf) { usb_free_urb(urb); return -ENOMEM; } pipe = usb_rcvintpipe(data->udev, 0x81); usb_fill_int_urb(urb, data->udev, pipe, buf, size, bpa10x_rx_complete, hdev, 1); urb->transfer_flags |= URB_FREE_BUFFER; usb_anchor_urb(urb, &data->rx_anchor); err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) { bt_dev_err(hdev, "urb %p submission failed (%d)", urb, -err); usb_unanchor_urb(urb); } usb_free_urb(urb); return err; } static inline int bpa10x_submit_bulk_urb(struct hci_dev *hdev) { struct bpa10x_data *data = hci_get_drvdata(hdev); struct urb *urb; unsigned char *buf; unsigned int pipe; int err, size = 64; BT_DBG("%s", hdev->name); urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return -ENOMEM; buf = kmalloc(size, GFP_KERNEL); if (!buf) { usb_free_urb(urb); return -ENOMEM; } pipe = usb_rcvbulkpipe(data->udev, 0x82); usb_fill_bulk_urb(urb, data->udev, pipe, buf, size, bpa10x_rx_complete, hdev); urb->transfer_flags |= URB_FREE_BUFFER; usb_anchor_urb(urb, &data->rx_anchor); err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) { bt_dev_err(hdev, "urb %p submission failed (%d)", urb, -err); usb_unanchor_urb(urb); } usb_free_urb(urb); return err; } static int bpa10x_open(struct hci_dev *hdev) { struct bpa10x_data *data = hci_get_drvdata(hdev); int err; BT_DBG("%s", hdev->name); err = bpa10x_submit_intr_urb(hdev); if (err < 0) goto error; err = bpa10x_submit_bulk_urb(hdev); if (err < 0) goto error; return 0; error: usb_kill_anchored_urbs(&data->rx_anchor); return err; } static int bpa10x_close(struct hci_dev *hdev) { struct bpa10x_data *data = hci_get_drvdata(hdev); BT_DBG("%s", hdev->name); usb_kill_anchored_urbs(&data->rx_anchor); return 0; } static int bpa10x_flush(struct hci_dev *hdev) { struct bpa10x_data *data = hci_get_drvdata(hdev); BT_DBG("%s", hdev->name); usb_kill_anchored_urbs(&data->tx_anchor); return 0; } static int bpa10x_setup(struct hci_dev *hdev) { static const u8 req[] = { 0x07 }; struct sk_buff *skb; BT_DBG("%s", hdev->name); /* Read revision string */ skb = __hci_cmd_sync(hdev, 0xfc0e, sizeof(req), req, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) return PTR_ERR(skb); bt_dev_info(hdev, "%s", (char *)(skb->data + 1)); hci_set_fw_info(hdev, "%s", skb->data + 1); kfree_skb(skb); return 0; } static int bpa10x_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct bpa10x_data *data = hci_get_drvdata(hdev); struct usb_ctrlrequest *dr; struct urb *urb; unsigned int pipe; int err; BT_DBG("%s", hdev->name); skb->dev = (void *) hdev; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return -ENOMEM; /* Prepend skb with frame type */ *(u8 *)skb_push(skb, 1) = hci_skb_pkt_type(skb); switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: dr = kmalloc(sizeof(*dr), GFP_KERNEL); if (!dr) { usb_free_urb(urb); return -ENOMEM; } dr->bRequestType = USB_TYPE_VENDOR; dr->bRequest = 0; dr->wIndex = 0; dr->wValue = 0; dr->wLength = __cpu_to_le16(skb->len); pipe = usb_sndctrlpipe(data->udev, 0x00); usb_fill_control_urb(urb, data->udev, pipe, (void *) dr, skb->data, skb->len, bpa10x_tx_complete, skb); hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: pipe = usb_sndbulkpipe(data->udev, 0x02); usb_fill_bulk_urb(urb, data->udev, pipe, skb->data, skb->len, bpa10x_tx_complete, skb); hdev->stat.acl_tx++; break; case HCI_SCODATA_PKT: pipe = usb_sndbulkpipe(data->udev, 0x02); usb_fill_bulk_urb(urb, data->udev, pipe, skb->data, skb->len, bpa10x_tx_complete, skb); hdev->stat.sco_tx++; break; default: usb_free_urb(urb); return -EILSEQ; } usb_anchor_urb(urb, &data->tx_anchor); err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) { bt_dev_err(hdev, "urb %p submission failed", urb); kfree(urb->setup_packet); usb_unanchor_urb(urb); } usb_free_urb(urb); return err; } static int bpa10x_set_diag(struct hci_dev *hdev, bool enable) { const u8 req[] = { 0x00, enable }; struct sk_buff *skb; BT_DBG("%s", hdev->name); if (!test_bit(HCI_RUNNING, &hdev->flags)) return -ENETDOWN; /* Enable sniffer operation */ skb = __hci_cmd_sync(hdev, 0xfc0e, sizeof(req), req, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) return PTR_ERR(skb); kfree_skb(skb); return 0; } static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct bpa10x_data *data; struct hci_dev *hdev; int err; BT_DBG("intf %p id %p", intf, id); if (intf->cur_altsetting->desc.bInterfaceNumber != 0) return -ENODEV; data = devm_kzalloc(&intf->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->udev = interface_to_usbdev(intf); init_usb_anchor(&data->tx_anchor); init_usb_anchor(&data->rx_anchor); hdev = hci_alloc_dev(); if (!hdev) return -ENOMEM; hdev->bus = HCI_USB; hci_set_drvdata(hdev, data); data->hdev = hdev; SET_HCIDEV_DEV(hdev, &intf->dev); hdev->open = bpa10x_open; hdev->close = bpa10x_close; hdev->flush = bpa10x_flush; hdev->setup = bpa10x_setup; hdev->send = bpa10x_send_frame; hdev->set_diag = bpa10x_set_diag; hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); err = hci_register_dev(hdev); if (err < 0) { hci_free_dev(hdev); return err; } usb_set_intfdata(intf, data); return 0; } static void bpa10x_disconnect(struct usb_interface *intf) { struct bpa10x_data *data = usb_get_intfdata(intf); BT_DBG("intf %p", intf); if (!data) return; usb_set_intfdata(intf, NULL); hci_unregister_dev(data->hdev); hci_free_dev(data->hdev); kfree_skb(data->rx_skb[0]); kfree_skb(data->rx_skb[1]); } static struct usb_driver bpa10x_driver = { .name = "bpa10x", .probe = bpa10x_probe, .disconnect = bpa10x_disconnect, .id_table = bpa10x_table, .disable_hub_initiated_lpm = 1, }; module_usb_driver(bpa10x_driver); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Digianswer Bluetooth USB driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); |
| 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2014 Facebook. All rights reserved. */ #ifndef BTRFS_QGROUP_H #define BTRFS_QGROUP_H #include <linux/types.h> #include <linux/spinlock.h> #include <linux/rbtree.h> #include <linux/kobject.h> #include <linux/list.h> #include <uapi/linux/btrfs_tree.h> struct extent_buffer; struct extent_changeset; struct btrfs_delayed_extent_op; struct btrfs_fs_info; struct btrfs_root; struct btrfs_ioctl_quota_ctl_args; struct btrfs_trans_handle; struct btrfs_delayed_ref_root; struct btrfs_inode; struct btrfs_transaction; struct btrfs_block_group; struct btrfs_qgroup_swapped_blocks; /* * Btrfs qgroup overview * * Btrfs qgroup splits into 3 main part: * 1) Reserve * Reserve metadata/data space for incoming operations * Affect how qgroup limit works * * 2) Trace * Tell btrfs qgroup to trace dirty extents. * * Dirty extents including: * - Newly allocated extents * - Extents going to be deleted (in this trans) * - Extents whose owner is going to be modified * * This is the main part affects whether qgroup numbers will stay * consistent. * Btrfs qgroup can trace clean extents and won't cause any problem, * but it will consume extra CPU time, it should be avoided if possible. * * 3) Account * Btrfs qgroup will updates its numbers, based on dirty extents traced * in previous step. * * Normally at qgroup rescan and transaction commit time. */ /* * Special performance optimization for balance. * * For balance, we need to swap subtree of subvolume and reloc trees. * In theory, we need to trace all subtree blocks of both subvolume and reloc * trees, since their owner has changed during such swap. * * However since balance has ensured that both subtrees are containing the * same contents and have the same tree structures, such swap won't cause * qgroup number change. * * But there is a race window between subtree swap and transaction commit, * during that window, if we increase/decrease tree level or merge/split tree * blocks, we still need to trace the original subtrees. * * So for balance, we use a delayed subtree tracing, whose workflow is: * * 1) Record the subtree root block get swapped. * * During subtree swap: * O = Old tree blocks * N = New tree blocks * reloc tree subvolume tree X * Root Root * / \ / \ * NA OB OA OB * / | | \ / | | \ * NC ND OE OF OC OD OE OF * * In this case, NA and OA are going to be swapped, record (NA, OA) into * subvolume tree X. * * 2) After subtree swap. * reloc tree subvolume tree X * Root Root * / \ / \ * OA OB NA OB * / | | \ / | | \ * OC OD OE OF NC ND OE OF * * 3a) COW happens for OB * If we are going to COW tree block OB, we check OB's bytenr against * tree X's swapped_blocks structure. * If it doesn't fit any, nothing will happen. * * 3b) COW happens for NA * Check NA's bytenr against tree X's swapped_blocks, and get a hit. * Then we do subtree scan on both subtrees OA and NA. * Resulting 6 tree blocks to be scanned (OA, OC, OD, NA, NC, ND). * * Then no matter what we do to subvolume tree X, qgroup numbers will * still be correct. * Then NA's record gets removed from X's swapped_blocks. * * 4) Transaction commit * Any record in X's swapped_blocks gets removed, since there is no * modification to the swapped subtrees, no need to trigger heavy qgroup * subtree rescan for them. */ /* * These flags share the flags field of the btrfs_qgroup_status_item with the * persisted flags defined in btrfs_tree.h. * * To minimize the chance of collision with new persisted status flags, these * count backwards from the MSB. */ #define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1ULL << 63) #define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING (1ULL << 62) #define BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT (3) /* * Record a dirty extent, and info qgroup to update quota on it */ struct btrfs_qgroup_extent_record { /* * The bytenr of the extent is given by its index in the dirty_extents * xarray of struct btrfs_delayed_ref_root left shifted by * fs_info->sectorsize_bits. */ u64 num_bytes; /* * For qgroup reserved data space freeing. * * @data_rsv_refroot and @data_rsv will be recorded after * BTRFS_ADD_DELAYED_EXTENT is called. * And will be used to free reserved qgroup space at * transaction commit time. */ u32 data_rsv; /* reserved data space needs to be freed */ u64 data_rsv_refroot; /* which root the reserved data belongs to */ struct ulist *old_roots; }; struct btrfs_qgroup_swapped_block { struct rb_node node; int level; bool trace_leaf; /* bytenr/generation of the tree block in subvolume tree after swap */ u64 subvol_bytenr; u64 subvol_generation; /* bytenr/generation of the tree block in reloc tree after swap */ u64 reloc_bytenr; u64 reloc_generation; u64 last_snapshot; struct btrfs_key first_key; }; /* * Qgroup reservation types: * * DATA: * space reserved for data * * META_PERTRANS: * Space reserved for metadata (per-transaction) * Due to the fact that qgroup data is only updated at transaction commit * time, reserved space for metadata must be kept until transaction * commits. * Any metadata reserved that are used in btrfs_start_transaction() should * be of this type. * * META_PREALLOC: * There are cases where metadata space is reserved before starting * transaction, and then btrfs_join_transaction() to get a trans handle. * Any metadata reserved for such usage should be of this type. * And after join_transaction() part (or all) of such reservation should * be converted into META_PERTRANS. */ enum btrfs_qgroup_rsv_type { BTRFS_QGROUP_RSV_DATA, BTRFS_QGROUP_RSV_META_PERTRANS, BTRFS_QGROUP_RSV_META_PREALLOC, BTRFS_QGROUP_RSV_LAST, }; /* * Represents how many bytes we have reserved for this qgroup. * * Each type should have different reservation behavior. * E.g, data follows its io_tree flag modification, while * *currently* meta is just reserve-and-clear during transaction. * * TODO: Add new type for reservation which can survive transaction commit. * Current metadata reservation behavior is not suitable for such case. */ struct btrfs_qgroup_rsv { u64 values[BTRFS_QGROUP_RSV_LAST]; }; /* * one struct for each qgroup, organized in fs_info->qgroup_tree. */ struct btrfs_qgroup { u64 qgroupid; /* * state */ u64 rfer; /* referenced */ u64 rfer_cmpr; /* referenced compressed */ u64 excl; /* exclusive */ u64 excl_cmpr; /* exclusive compressed */ /* * limits */ u64 lim_flags; /* which limits are set */ u64 max_rfer; u64 max_excl; u64 rsv_rfer; u64 rsv_excl; /* * reservation tracking */ struct btrfs_qgroup_rsv rsv; /* * lists */ struct list_head groups; /* groups this group is member of */ struct list_head members; /* groups that are members of this group */ struct list_head dirty; /* dirty groups */ /* * For qgroup iteration usage. * * The iteration list should always be empty until qgroup_iterator_add() * is called. And should be reset to empty after the iteration is * finished. */ struct list_head iterator; /* * For nested iterator usage. * * Here we support at most one level of nested iterator calls like: * * LIST_HEAD(all_qgroups); * { * LIST_HEAD(local_qgroups); * qgroup_iterator_add(local_qgroups, qg); * qgroup_iterator_nested_add(all_qgroups, qg); * do_some_work(local_qgroups); * qgroup_iterator_clean(local_qgroups); * } * do_some_work(all_qgroups); * qgroup_iterator_nested_clean(all_qgroups); */ struct list_head nested_iterator; struct rb_node node; /* tree of qgroups */ /* * temp variables for accounting operations * Refer to qgroup_shared_accounting() for details. */ u64 old_refcnt; u64 new_refcnt; /* * Sysfs kobjectid */ struct kobject kobj; }; /* Glue structure to represent the relations between qgroups. */ struct btrfs_qgroup_list { struct list_head next_group; struct list_head next_member; struct btrfs_qgroup *group; struct btrfs_qgroup *member; }; struct btrfs_squota_delta { /* The fstree root this delta counts against. */ u64 root; /* The number of bytes in the extent being counted. */ u64 num_bytes; /* The generation the extent was created in. */ u64 generation; /* Whether we are using or freeing the extent. */ bool is_inc; /* Whether the extent is data or metadata. */ bool is_data; }; static inline u64 btrfs_qgroup_subvolid(u64 qgroupid) { return (qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1)); } /* * For qgroup event trace points only */ enum { ENUM_BIT(QGROUP_RESERVE), ENUM_BIT(QGROUP_RELEASE), ENUM_BIT(QGROUP_FREE), }; enum btrfs_qgroup_mode { BTRFS_QGROUP_MODE_DISABLED, BTRFS_QGROUP_MODE_FULL, BTRFS_QGROUP_MODE_SIMPLE }; enum btrfs_qgroup_mode btrfs_qgroup_mode(const struct btrfs_fs_info *fs_info); bool btrfs_qgroup_enabled(const struct btrfs_fs_info *fs_info); bool btrfs_qgroup_full_accounting(const struct btrfs_fs_info *fs_info); int btrfs_quota_enable(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_quota_ctl_args *quota_ctl_args); int btrfs_quota_disable(struct btrfs_fs_info *fs_info); int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, bool interruptible); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst, struct btrfs_qgroup_list *prealloc); int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst); int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 subvolid); int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, struct btrfs_qgroup_limit *limit); int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); int btrfs_qgroup_trace_extent_nolock( struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_qgroup_extent_record *record, u64 bytenr); int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord, u64 bytenr); int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes); int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, struct extent_buffer *eb); int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, struct extent_buffer *root_eb, u64 root_gen, int root_level); int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct ulist *old_roots, struct ulist *new_roots); int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); int btrfs_run_qgroups(struct btrfs_trans_handle *trans); int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, struct btrfs_qgroup_inherit *inherit, size_t size); int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, u64 objectid, u64 inode_rootid, struct btrfs_qgroup_inherit *inherit); void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes, enum btrfs_qgroup_rsv_type type); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid, u64 rfer, u64 excl); #endif /* New io_tree based accurate qgroup reserve API */ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len); int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released); int btrfs_qgroup_free_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len, u64 *freed); int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce); int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce, bool noflush); /* Reserve metadata space for pertrans and prealloc type */ static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root, int num_bytes, bool enforce) { return __btrfs_qgroup_reserve_meta(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS, enforce, false); } static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root, int num_bytes, bool enforce, bool noflush) { return __btrfs_qgroup_reserve_meta(root, num_bytes, BTRFS_QGROUP_RSV_META_PREALLOC, enforce, noflush); } void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type); /* Free per-transaction meta reservation for error handling */ static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root, int num_bytes) { __btrfs_qgroup_free_meta(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); } /* Pre-allocated meta reservation can be freed at need */ static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root, int num_bytes) { __btrfs_qgroup_free_meta(root, num_bytes, BTRFS_QGROUP_RSV_META_PREALLOC); } void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root); void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes); void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode); /* btrfs_qgroup_swapped_blocks related functions */ void btrfs_qgroup_init_swapped_blocks( struct btrfs_qgroup_swapped_blocks *swapped_blocks); void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root); int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root, struct btrfs_block_group *bg, struct extent_buffer *subvol_parent, int subvol_slot, struct extent_buffer *reloc_parent, int reloc_slot, u64 last_snapshot); int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb); void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info); int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, const struct btrfs_squota_delta *delta); #endif |
| 1 843 1 1458 1458 1460 1459 1463 1463 1460 1 1457 1102 875 1463 4 1460 1463 840 840 1 841 1 842 843 842 841 843 843 1462 843 842 843 1458 1461 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 | // SPDX-License-Identifier: GPL-2.0 /* * FPU signal frame handling routines. */ #include <linux/compat.h> #include <linux/cpu.h> #include <linux/pagemap.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> #include <asm/fpu/xstate.h> #include <asm/sigframe.h> #include <asm/trapnr.h> #include <asm/trace/fpu.h> #include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, struct _fpx_sw_bytes *fx_sw) { void __user *fpstate = fxbuf; unsigned int magic2; if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw))) return false; /* Check for the first magic field */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1) goto setfx; /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy * fpstate layout with out copying the extended state information * in the memory layout. */ if (__get_user(magic2, (__u32 __user *)(fpstate + x86_task_fpu(current)->fpstate->user_size))) return false; if (likely(magic2 == FP_XSTATE_MAGIC2)) return true; setfx: trace_x86_fpu_xstate_check_failed(x86_task_fpu(current)); /* Set the parameters for fx only state */ fx_sw->magic1 = 0; fx_sw->xstate_size = sizeof(struct fxregs_state); fx_sw->xfeatures = XFEATURE_MASK_FPSSE; return true; } /* * Signal frame handlers. */ static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { struct xregs_state *xsave = &x86_task_fpu(tsk)->fpstate->regs.xsave; struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) fxsave(&x86_task_fpu(tsk)->fpstate->regs.fxsave); fpregs_unlock(); convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env)) || __put_user(xsave->i387.swd, &fp->status) || __put_user(X86_FXSR_MAGIC, &fp->magic)) return false; } else { struct fregs_state __user *fp = buf; u32 swd; if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) return false; } return true; } /* * Prepare the SW reserved portion of the fxsave memory layout, indicating * the presence of the extended state information in the memory layout * pointed to by the fpstate pointer in the sigcontext. * This is saved when ever the FP and extended state context is * saved on the user stack during the signal handler delivery to the user. */ static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame, struct fpstate *fpstate) { sw_bytes->magic1 = FP_XSTATE_MAGIC1; sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE; sw_bytes->xfeatures = fpstate->user_xfeatures; sw_bytes->xstate_size = fpstate->user_size; if (ia32_frame) sw_bytes->extended_size += sizeof(struct fregs_state); } static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, struct fpstate *fpstate) { struct xregs_state __user *x = buf; struct _fpx_sw_bytes sw_bytes = {}; int err; /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ save_sw_bytes(&sw_bytes, ia32_frame, fpstate); err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes)); if (!use_xsave()) return !err; err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *)(buf + fpstate->user_size)); /* * For legacy compatible, we always set FP/SSE bits in the bit * vector while saving the state to the user context. This will * enable us capturing any changes(during sigreturn) to * the FP/SSE bits by the legacy applications which don't touch * xfeatures in the xsave header. * * xsave aware apps can change the xfeatures in the xsave * header as well as change any contents in the memory layout. * xrestore as part of sigreturn will capture all the changes. */ err |= set_xfeature_in_sigframe(x, XFEATURE_MASK_FPSSE); return !err; } static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru) { if (use_xsave()) return xsave_to_user_sigframe(buf, pkru); if (use_fxsr()) return fxsave_to_user_sigframe((struct fxregs_state __user *) buf); else return fnsave_to_user_sigframe((struct fregs_state __user *) buf); } /* * Save the fpu, extended register state to the user signal frame. * * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save * state is copied. * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. * * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * * Save it directly to the user frame with disabled page fault handler. If * that faults, try to clear the frame which handles the page fault. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. * * For [f]xsave state, update the SW reserved fields in the [f]xsave frame * indicating the absence/presence of the extended state to the user. */ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru) { struct task_struct *tsk = current; struct fpstate *fpstate = x86_task_fpu(tsk)->fpstate; bool ia32_fxstate = (buf != buf_fx); int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); if (!static_cpu_has(X86_FEATURE_FPU)) { struct user_i387_ia32_struct fp; fpregs_soft_get(current, NULL, (struct membuf){.p = &fp, .left = sizeof(fp)}); return !copy_to_user(buf, &fp, sizeof(fp)); } if (!access_ok(buf, size)) return false; if (use_xsave()) { struct xregs_state __user *xbuf = buf_fx; /* * Clear the xsave header first, so that reserved fields are * initialized to zero. */ if (__clear_user(&xbuf->header, sizeof(xbuf->header))) return false; } retry: /* * Load the FPU registers if they are not valid for the current task. * With a valid FPU state we can attempt to save the state directly to * userland's stack frame which will likely succeed. If it does not, * resolve the fault in the user memory and try again. */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); pagefault_disable(); ret = copy_fpregs_to_sigframe(buf_fx, pkru); pagefault_enable(); fpregs_unlock(); if (ret) { if (!__clear_user(buf_fx, fpstate->user_size)) goto retry; return false; } /* Save the fsave header for the 32-bit frames. */ if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf)) return false; if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate)) return false; return true; } static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, u64 xrestore, bool fx_only) { if (use_xsave()) { u64 init_bv = ufeatures & ~xrestore; int ret; if (likely(!fx_only)) ret = xrstor_from_user_sigframe(buf, xrestore); else ret = fxrstor_from_user_sigframe(buf); if (!ret && unlikely(init_bv)) os_xrstor(&init_fpstate, init_bv); return ret; } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); } else { return frstor_from_user_sigframe(buf); } } /* * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = x86_task_fpu(current); int ret; /* Restore enabled features only. */ xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ xfd_update_state(fpu->fpstate); pagefault_disable(); ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures, xrestore, fx_only); pagefault_enable(); if (unlikely(ret)) { /* * The above did an FPU restore operation, restricted to * the user portion of the registers, and failed, but the * microcode might have modified the FPU registers * nevertheless. * * If the FPU registers do not belong to current, then * invalidate the FPU register state otherwise the task * might preempt current and return to user space with * corrupted FPU registers. */ if (test_thread_flag(TIF_NEED_FPU_LOAD)) __cpu_invalidate_fpregs_state(); fpregs_unlock(); /* Try to handle #PF, but anything else is fatal. */ if (ret != X86_TRAP_PF) return false; if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } /* * Restore supervisor states: previous context switch etc has done * XSAVES and saved the supervisor states in the kernel buffer from * which they can be restored now. * * It would be optimal to handle this with a single XRSTORS, but * this does not work because the rest of the FPU registers have * been restored from a user buffer directly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) os_xrstor_supervisor(fpu->fpstate); fpregs_mark_activate(); fpregs_unlock(); return true; } static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, bool ia32_fxstate) { struct task_struct *tsk = current; struct fpu *fpu = x86_task_fpu(tsk); struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; u64 user_xfeatures = 0; if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user)) return false; fx_only = !fx_sw_user.magic1; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* * Copy the legacy state because the FP portion of the FX frame has * to be ignored for histerical raisins. The legacy state is folded * in once the larger state has been copied. */ if (__copy_from_user(&env, buf, sizeof(env))) return false; /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is * not modified on context switch and that the xstate is considered * to be loaded again on return to userland (overriding last_cpu avoids * the optimisation). */ fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { /* * If supervisor states are available then save the * hardware state in current's fpstate so that the * supervisor state is preserved. Save the full state for * simplicity. There is no point in optimizing this by only * saving the supervisor states and then shuffle them to * the right place in memory. It's ia32 mode. Shrug. */ if (xfeatures_mask_supervisor()) os_xsave(fpu->fpstate); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); __cpu_invalidate_fpregs_state(); fpregs_unlock(); fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { if (copy_sigframe_from_user_to_xstate(tsk, buf_fx)) return false; } else { if (__copy_from_user(&fpregs->fxsave, buf_fx, sizeof(fpregs->fxsave))) return false; if (IS_ENABLED(CONFIG_X86_64)) { /* Reject invalid MXCSR values. */ if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask) return false; } else { /* Mask invalid bits out for historical reasons (broken hardware). */ fpregs->fxsave.mxcsr &= mxcsr_feature_mask; } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ if (use_xsave()) fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; } /* Fold the legacy FP storage */ convert_to_fxsr(&fpregs->fxsave, &env); fpregs_lock(); if (use_xsave()) { /* * Remove all UABI feature bits not set in user_xfeatures * from the memory xstate header which makes the full * restore below bring them into init state. This works for * fx_only mode as well because that has only FP and SSE * set in user_xfeatures. * * Preserve supervisor states! */ u64 mask = user_xfeatures | xfeatures_mask_supervisor(); fpregs->xsave.header.xfeatures &= mask; success = !os_xrstor_safe(fpu->fpstate, fpu_kernel_cfg.max_features); } else { success = !fxrstor_safe(&fpregs->fxsave); } if (likely(success)) fpregs_mark_activate(); fpregs_unlock(); return success; } static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate) { unsigned int size = fpstate->user_size; return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size; } /* * Restore FPU state from a sigframe: */ bool fpu__restore_sig(void __user *buf, int ia32_frame) { struct fpu *fpu = x86_task_fpu(current); void __user *buf_fx = buf; bool ia32_fxstate = false; bool success = false; unsigned int size; if (unlikely(!buf)) { fpu__clear_user_states(fpu); return true; } size = xstate_sigframe_size(fpu->fpstate); ia32_frame &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); /* * Only FXSR enabled systems need the FX state quirk. * FRSTOR does not need it and can use the fast path. */ if (ia32_frame && use_fxsr()) { buf_fx = buf + sizeof(struct fregs_state); size += sizeof(struct fregs_state); ia32_fxstate = true; } if (!access_ok(buf, size)) goto out; if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) { success = !fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, buf); } else { success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); } out: if (unlikely(!success)) fpu__clear_user_states(fpu); return success; } unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) { unsigned long frame_size = xstate_sigframe_size(x86_task_fpu(current)->fpstate); *buf_fx = sp = round_down(sp - frame_size, 64); if (ia32_frame && use_fxsr()) { frame_size += sizeof(struct fregs_state); sp -= sizeof(struct fregs_state); } *size = frame_size; return sp; } unsigned long __init fpu__get_fpstate_size(void) { unsigned long ret = fpu_user_cfg.max_size; if (use_xsave()) ret += FP_XSTATE_MAGIC2_SIZE; /* * This space is needed on (most) 32-bit kernels, or when a 32-bit * app is running on a 64-bit kernel. To keep things simple, just * assume the worst case and always include space for 'freg_state', * even for 64-bit apps on 64-bit kernels. This wastes a bit of * space, but keeps the code simple. */ if ((IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) && use_fxsr()) ret += sizeof(struct fregs_state); return ret; } |
| 75 759 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Task I/O accounting operations */ #ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED #define __TASK_IO_ACCOUNTING_OPS_INCLUDED #include <linux/sched.h> #ifdef CONFIG_TASK_IO_ACCOUNTING static inline void task_io_account_read(size_t bytes) { current->ioac.read_bytes += bytes; } /* * We approximate number of blocks, because we account bytes only. * A 'block' is 512 bytes */ static inline unsigned long task_io_get_inblock(const struct task_struct *p) { return p->ioac.read_bytes >> 9; } static inline void task_io_account_write(size_t bytes) { current->ioac.write_bytes += bytes; } /* * We approximate number of blocks, because we account bytes only. * A 'block' is 512 bytes */ static inline unsigned long task_io_get_oublock(const struct task_struct *p) { return p->ioac.write_bytes >> 9; } static inline void task_io_account_cancelled_write(size_t bytes) { current->ioac.cancelled_write_bytes += bytes; } static inline void task_io_accounting_init(struct task_io_accounting *ioac) { memset(ioac, 0, sizeof(*ioac)); } static inline void task_blk_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { dst->read_bytes += src->read_bytes; dst->write_bytes += src->write_bytes; dst->cancelled_write_bytes += src->cancelled_write_bytes; } #else static inline void task_io_account_read(size_t bytes) { } static inline unsigned long task_io_get_inblock(const struct task_struct *p) { return 0; } static inline void task_io_account_write(size_t bytes) { } static inline unsigned long task_io_get_oublock(const struct task_struct *p) { return 0; } static inline void task_io_account_cancelled_write(size_t bytes) { } static inline void task_io_accounting_init(struct task_io_accounting *ioac) { } static inline void task_blk_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { } #endif /* CONFIG_TASK_IO_ACCOUNTING */ #ifdef CONFIG_TASK_XACCT static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { dst->rchar += src->rchar; dst->wchar += src->wchar; dst->syscr += src->syscr; dst->syscw += src->syscw; } #else static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { } #endif /* CONFIG_TASK_XACCT */ static inline void task_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { task_chr_io_accounting_add(dst, src); task_blk_io_accounting_add(dst, src); } #endif /* __TASK_IO_ACCOUNTING_OPS_INCLUDED */ |
| 1 1 2 2 1 2 2 2 1 2 2 2 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 | // SPDX-License-Identifier: GPL-2.0 /* * Fintek F81232 USB to serial adaptor driver * Fintek F81532A/534A/535/536 USB to 2/4/8/12 serial adaptor driver * * Copyright (C) 2012 Greg Kroah-Hartman (gregkh@linuxfoundation.org) * Copyright (C) 2012 Linux Foundation */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/serial.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/serial_reg.h> #define F81232_ID \ { USB_DEVICE(0x1934, 0x0706) } /* 1 port UART device */ #define F81534A_SERIES_ID \ { USB_DEVICE(0x2c42, 0x1602) }, /* In-Box 2 port UART device */ \ { USB_DEVICE(0x2c42, 0x1604) }, /* In-Box 4 port UART device */ \ { USB_DEVICE(0x2c42, 0x1605) }, /* In-Box 8 port UART device */ \ { USB_DEVICE(0x2c42, 0x1606) }, /* In-Box 12 port UART device */ \ { USB_DEVICE(0x2c42, 0x1608) }, /* Non-Flash type */ \ { USB_DEVICE(0x2c42, 0x1632) }, /* 2 port UART device */ \ { USB_DEVICE(0x2c42, 0x1634) }, /* 4 port UART device */ \ { USB_DEVICE(0x2c42, 0x1635) }, /* 8 port UART device */ \ { USB_DEVICE(0x2c42, 0x1636) } /* 12 port UART device */ #define F81534A_CTRL_ID \ { USB_DEVICE(0x2c42, 0x16f8) } /* Global control device */ static const struct usb_device_id f81232_id_table[] = { F81232_ID, { } /* Terminating entry */ }; static const struct usb_device_id f81534a_id_table[] = { F81534A_SERIES_ID, { } /* Terminating entry */ }; static const struct usb_device_id f81534a_ctrl_id_table[] = { F81534A_CTRL_ID, { } /* Terminating entry */ }; static const struct usb_device_id combined_id_table[] = { F81232_ID, F81534A_SERIES_ID, F81534A_CTRL_ID, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, combined_id_table); /* Maximum baudrate for F81232 */ #define F81232_MAX_BAUDRATE 1500000 #define F81232_DEF_BAUDRATE 9600 /* USB Control EP parameter */ #define F81232_REGISTER_REQUEST 0xa0 #define F81232_GET_REGISTER 0xc0 #define F81232_SET_REGISTER 0x40 #define F81534A_ACCESS_REG_RETRY 2 #define SERIAL_BASE_ADDRESS 0x0120 #define RECEIVE_BUFFER_REGISTER (0x00 + SERIAL_BASE_ADDRESS) #define INTERRUPT_ENABLE_REGISTER (0x01 + SERIAL_BASE_ADDRESS) #define FIFO_CONTROL_REGISTER (0x02 + SERIAL_BASE_ADDRESS) #define LINE_CONTROL_REGISTER (0x03 + SERIAL_BASE_ADDRESS) #define MODEM_CONTROL_REGISTER (0x04 + SERIAL_BASE_ADDRESS) #define LINE_STATUS_REGISTER (0x05 + SERIAL_BASE_ADDRESS) #define MODEM_STATUS_REGISTER (0x06 + SERIAL_BASE_ADDRESS) /* * F81232 Clock registers (106h) * * Bit1-0: Clock source selector * 00: 1.846MHz. * 01: 18.46MHz. * 10: 24MHz. * 11: 14.77MHz. */ #define F81232_CLK_REGISTER 0x106 #define F81232_CLK_1_846_MHZ 0 #define F81232_CLK_18_46_MHZ BIT(0) #define F81232_CLK_24_MHZ BIT(1) #define F81232_CLK_14_77_MHZ (BIT(1) | BIT(0)) #define F81232_CLK_MASK GENMASK(1, 0) #define F81534A_MODE_REG 0x107 #define F81534A_TRIGGER_MASK GENMASK(3, 2) #define F81534A_TRIGGER_MULTIPLE_4X BIT(3) #define F81534A_FIFO_128BYTE (BIT(1) | BIT(0)) /* Serial port self GPIO control, 2bytes [control&output data][input data] */ #define F81534A_GPIO_REG 0x10e #define F81534A_GPIO_MODE2_DIR BIT(6) /* 1: input, 0: output */ #define F81534A_GPIO_MODE1_DIR BIT(5) #define F81534A_GPIO_MODE0_DIR BIT(4) #define F81534A_GPIO_MODE2_OUTPUT BIT(2) #define F81534A_GPIO_MODE1_OUTPUT BIT(1) #define F81534A_GPIO_MODE0_OUTPUT BIT(0) #define F81534A_CTRL_CMD_ENABLE_PORT 0x116 struct f81232_private { struct mutex lock; u8 modem_control; u8 modem_status; u8 shadow_lcr; speed_t baud_base; struct work_struct lsr_work; struct work_struct interrupt_work; struct usb_serial_port *port; }; static u32 const baudrate_table[] = { 115200, 921600, 1152000, 1500000 }; static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ, F81232_CLK_18_46_MHZ, F81232_CLK_24_MHZ }; static int calc_baud_divisor(speed_t baudrate, speed_t clockrate) { return DIV_ROUND_CLOSEST(clockrate, baudrate); } static int f81232_get_register(struct usb_serial_port *port, u16 reg, u8 *val) { int status; struct usb_device *dev = port->serial->dev; status = usb_control_msg_recv(dev, 0, F81232_REGISTER_REQUEST, F81232_GET_REGISTER, reg, 0, val, sizeof(*val), USB_CTRL_GET_TIMEOUT, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s failed status: %d\n", __func__, status); status = usb_translate_errors(status); } return status; } static int f81232_set_register(struct usb_serial_port *port, u16 reg, u8 val) { int status; struct usb_device *dev = port->serial->dev; status = usb_control_msg_send(dev, 0, F81232_REGISTER_REQUEST, F81232_SET_REGISTER, reg, 0, &val, sizeof(val), USB_CTRL_SET_TIMEOUT, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s failed status: %d\n", __func__, status); status = usb_translate_errors(status); } return status; } static int f81232_set_mask_register(struct usb_serial_port *port, u16 reg, u8 mask, u8 val) { int status; u8 tmp; status = f81232_get_register(port, reg, &tmp); if (status) return status; tmp = (tmp & ~mask) | (val & mask); return f81232_set_register(port, reg, tmp); } static void f81232_read_msr(struct usb_serial_port *port) { int status; u8 current_msr; struct tty_struct *tty; struct f81232_private *priv = usb_get_serial_port_data(port); mutex_lock(&priv->lock); status = f81232_get_register(port, MODEM_STATUS_REGISTER, ¤t_msr); if (status) { dev_err(&port->dev, "%s fail, status: %d\n", __func__, status); mutex_unlock(&priv->lock); return; } if (!(current_msr & UART_MSR_ANY_DELTA)) { mutex_unlock(&priv->lock); return; } priv->modem_status = current_msr; if (current_msr & UART_MSR_DCTS) port->icount.cts++; if (current_msr & UART_MSR_DDSR) port->icount.dsr++; if (current_msr & UART_MSR_TERI) port->icount.rng++; if (current_msr & UART_MSR_DDCD) { port->icount.dcd++; tty = tty_port_tty_get(&port->port); if (tty) { usb_serial_handle_dcd_change(port, tty, current_msr & UART_MSR_DCD); tty_kref_put(tty); } } wake_up_interruptible(&port->port.delta_msr_wait); mutex_unlock(&priv->lock); } static int f81232_set_mctrl(struct usb_serial_port *port, unsigned int set, unsigned int clear) { u8 val; int status; struct f81232_private *priv = usb_get_serial_port_data(port); if (((set | clear) & (TIOCM_DTR | TIOCM_RTS)) == 0) return 0; /* no change */ /* 'set' takes precedence over 'clear' */ clear &= ~set; /* force enable interrupt with OUT2 */ mutex_lock(&priv->lock); val = UART_MCR_OUT2 | priv->modem_control; if (clear & TIOCM_DTR) val &= ~UART_MCR_DTR; if (clear & TIOCM_RTS) val &= ~UART_MCR_RTS; if (set & TIOCM_DTR) val |= UART_MCR_DTR; if (set & TIOCM_RTS) val |= UART_MCR_RTS; dev_dbg(&port->dev, "%s new:%02x old:%02x\n", __func__, val, priv->modem_control); status = f81232_set_register(port, MODEM_CONTROL_REGISTER, val); if (status) { dev_err(&port->dev, "%s set MCR status < 0\n", __func__); mutex_unlock(&priv->lock); return status; } priv->modem_control = val; mutex_unlock(&priv->lock); return 0; } static void f81232_update_line_status(struct usb_serial_port *port, unsigned char *data, size_t actual_length) { struct f81232_private *priv = usb_get_serial_port_data(port); if (!actual_length) return; switch (data[0] & 0x07) { case 0x00: /* msr change */ dev_dbg(&port->dev, "IIR: MSR Change: %02x\n", data[0]); schedule_work(&priv->interrupt_work); break; case 0x02: /* tx-empty */ break; case 0x04: /* rx data available */ break; case 0x06: /* lsr change */ /* we can forget it. the LSR will read from bulk-in */ dev_dbg(&port->dev, "IIR: LSR Change: %02x\n", data[0]); break; } } static void f81232_read_int_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; unsigned int actual_length = urb->actual_length; int status = urb->status; int retval; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&port->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } usb_serial_debug_data(&port->dev, __func__, urb->actual_length, urb->transfer_buffer); f81232_update_line_status(port, data, actual_length); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } static char f81232_handle_lsr(struct usb_serial_port *port, u8 lsr) { struct f81232_private *priv = usb_get_serial_port_data(port); char tty_flag = TTY_NORMAL; if (!(lsr & UART_LSR_BRK_ERROR_BITS)) return tty_flag; if (lsr & UART_LSR_BI) { tty_flag = TTY_BREAK; port->icount.brk++; usb_serial_handle_break(port); } else if (lsr & UART_LSR_PE) { tty_flag = TTY_PARITY; port->icount.parity++; } else if (lsr & UART_LSR_FE) { tty_flag = TTY_FRAME; port->icount.frame++; } if (lsr & UART_LSR_OE) { port->icount.overrun++; schedule_work(&priv->lsr_work); tty_insert_flip_char(&port->port, 0, TTY_OVERRUN); } return tty_flag; } static void f81232_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; char tty_flag; unsigned int i; u8 lsr; /* * When opening the port we get a 1-byte packet with the current LSR, * which we discard. */ if ((urb->actual_length < 2) || (urb->actual_length % 2)) return; /* bulk-in data: [LSR(1Byte)+DATA(1Byte)][LSR(1Byte)+DATA(1Byte)]... */ for (i = 0; i < urb->actual_length; i += 2) { lsr = data[i]; tty_flag = f81232_handle_lsr(port, lsr); if (port->sysrq) { if (usb_serial_handle_sysrq_char(port, data[i + 1])) continue; } tty_insert_flip_char(&port->port, data[i + 1], tty_flag); } tty_flip_buffer_push(&port->port); } static void f81534a_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; char tty_flag; unsigned int i; u8 lsr; u8 len; if (urb->actual_length < 3) { dev_err(&port->dev, "short message received: %d\n", urb->actual_length); return; } len = data[0]; if (len != urb->actual_length) { dev_err(&port->dev, "malformed message received: %d (%d)\n", urb->actual_length, len); return; } /* bulk-in data: [LEN][Data.....][LSR] */ lsr = data[len - 1]; tty_flag = f81232_handle_lsr(port, lsr); if (port->sysrq) { for (i = 1; i < len - 1; ++i) { if (!usb_serial_handle_sysrq_char(port, data[i])) { tty_insert_flip_char(&port->port, data[i], tty_flag); } } } else { tty_insert_flip_string_fixed_flag(&port->port, &data[1], tty_flag, len - 2); } tty_flip_buffer_push(&port->port); } static int f81232_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct f81232_private *priv = usb_get_serial_port_data(port); int status; mutex_lock(&priv->lock); if (break_state) priv->shadow_lcr |= UART_LCR_SBC; else priv->shadow_lcr &= ~UART_LCR_SBC; status = f81232_set_register(port, LINE_CONTROL_REGISTER, priv->shadow_lcr); if (status) dev_err(&port->dev, "set break failed: %d\n", status); mutex_unlock(&priv->lock); return status; } static int f81232_find_clk(speed_t baudrate) { int idx; for (idx = 0; idx < ARRAY_SIZE(baudrate_table); ++idx) { if (baudrate <= baudrate_table[idx] && baudrate_table[idx] % baudrate == 0) return idx; } return -EINVAL; } static void f81232_set_baudrate(struct tty_struct *tty, struct usb_serial_port *port, speed_t baudrate, speed_t old_baudrate) { struct f81232_private *priv = usb_get_serial_port_data(port); u8 lcr; int divisor; int status = 0; int i; int idx; speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE }; for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { baudrate = baud_list[i]; if (baudrate == 0) { tty_encode_baud_rate(tty, 0, 0); return; } idx = f81232_find_clk(baudrate); if (idx >= 0) { tty_encode_baud_rate(tty, baudrate, baudrate); break; } } if (idx < 0) return; priv->baud_base = baudrate_table[idx]; divisor = calc_baud_divisor(baudrate, priv->baud_base); status = f81232_set_mask_register(port, F81232_CLK_REGISTER, F81232_CLK_MASK, clock_table[idx]); if (status) { dev_err(&port->dev, "%s failed to set CLK_REG: %d\n", __func__, status); return; } status = f81232_get_register(port, LINE_CONTROL_REGISTER, &lcr); /* get LCR */ if (status) { dev_err(&port->dev, "%s failed to get LCR: %d\n", __func__, status); return; } status = f81232_set_register(port, LINE_CONTROL_REGISTER, lcr | UART_LCR_DLAB); /* Enable DLAB */ if (status) { dev_err(&port->dev, "%s failed to set DLAB: %d\n", __func__, status); return; } status = f81232_set_register(port, RECEIVE_BUFFER_REGISTER, divisor & 0x00ff); /* low */ if (status) { dev_err(&port->dev, "%s failed to set baudrate MSB: %d\n", __func__, status); goto reapply_lcr; } status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER, (divisor & 0xff00) >> 8); /* high */ if (status) { dev_err(&port->dev, "%s failed to set baudrate LSB: %d\n", __func__, status); } reapply_lcr: status = f81232_set_register(port, LINE_CONTROL_REGISTER, lcr & ~UART_LCR_DLAB); if (status) { dev_err(&port->dev, "%s failed to set DLAB: %d\n", __func__, status); } } static int f81232_port_enable(struct usb_serial_port *port) { u8 val; int status; /* fifo on, trigger8, clear TX/RX*/ val = UART_FCR_TRIGGER_8 | UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT; status = f81232_set_register(port, FIFO_CONTROL_REGISTER, val); if (status) { dev_err(&port->dev, "%s failed to set FCR: %d\n", __func__, status); return status; } /* MSR Interrupt only, LSR will read from Bulk-in odd byte */ status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER, UART_IER_MSI); if (status) { dev_err(&port->dev, "%s failed to set IER: %d\n", __func__, status); return status; } return 0; } static int f81232_port_disable(struct usb_serial_port *port) { int status; status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER, 0); if (status) { dev_err(&port->dev, "%s failed to set IER: %d\n", __func__, status); return status; } return 0; } static void f81232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct f81232_private *priv = usb_get_serial_port_data(port); u8 new_lcr = 0; int status = 0; speed_t baudrate; speed_t old_baud; /* Don't change anything if nothing has changed */ if (old_termios && !tty_termios_hw_change(&tty->termios, old_termios)) return; if (C_BAUD(tty) == B0) f81232_set_mctrl(port, 0, TIOCM_DTR | TIOCM_RTS); else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) f81232_set_mctrl(port, TIOCM_DTR | TIOCM_RTS, 0); baudrate = tty_get_baud_rate(tty); if (baudrate > 0) { if (old_termios) old_baud = tty_termios_baud_rate(old_termios); else old_baud = F81232_DEF_BAUDRATE; f81232_set_baudrate(tty, port, baudrate, old_baud); } if (C_PARENB(tty)) { new_lcr |= UART_LCR_PARITY; if (!C_PARODD(tty)) new_lcr |= UART_LCR_EPAR; if (C_CMSPAR(tty)) new_lcr |= UART_LCR_SPAR; } if (C_CSTOPB(tty)) new_lcr |= UART_LCR_STOP; new_lcr |= UART_LCR_WLEN(tty_get_char_size(tty->termios.c_cflag)); mutex_lock(&priv->lock); new_lcr |= (priv->shadow_lcr & UART_LCR_SBC); status = f81232_set_register(port, LINE_CONTROL_REGISTER, new_lcr); if (status) { dev_err(&port->dev, "%s failed to set LCR: %d\n", __func__, status); } priv->shadow_lcr = new_lcr; mutex_unlock(&priv->lock); } static int f81232_tiocmget(struct tty_struct *tty) { int r; struct usb_serial_port *port = tty->driver_data; struct f81232_private *port_priv = usb_get_serial_port_data(port); u8 mcr, msr; /* force get current MSR changed state */ f81232_read_msr(port); mutex_lock(&port_priv->lock); mcr = port_priv->modem_control; msr = port_priv->modem_status; mutex_unlock(&port_priv->lock); r = (mcr & UART_MCR_DTR ? TIOCM_DTR : 0) | (mcr & UART_MCR_RTS ? TIOCM_RTS : 0) | (msr & UART_MSR_CTS ? TIOCM_CTS : 0) | (msr & UART_MSR_DCD ? TIOCM_CAR : 0) | (msr & UART_MSR_RI ? TIOCM_RI : 0) | (msr & UART_MSR_DSR ? TIOCM_DSR : 0); return r; } static int f81232_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; return f81232_set_mctrl(port, set, clear); } static int f81232_open(struct tty_struct *tty, struct usb_serial_port *port) { int result; result = f81232_port_enable(port); if (result) return result; /* Setup termios */ if (tty) f81232_set_termios(tty, port, NULL); result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (result) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, result); return result; } result = usb_serial_generic_open(tty, port); if (result) { usb_kill_urb(port->interrupt_in_urb); return result; } return 0; } static int f81534a_open(struct tty_struct *tty, struct usb_serial_port *port) { int status; u8 mask; u8 val; val = F81534A_TRIGGER_MULTIPLE_4X | F81534A_FIFO_128BYTE; mask = F81534A_TRIGGER_MASK | F81534A_FIFO_128BYTE; status = f81232_set_mask_register(port, F81534A_MODE_REG, mask, val); if (status) { dev_err(&port->dev, "failed to set MODE_REG: %d\n", status); return status; } return f81232_open(tty, port); } static void f81232_close(struct usb_serial_port *port) { struct f81232_private *port_priv = usb_get_serial_port_data(port); f81232_port_disable(port); usb_serial_generic_close(port); usb_kill_urb(port->interrupt_in_urb); flush_work(&port_priv->interrupt_work); flush_work(&port_priv->lsr_work); } static void f81232_dtr_rts(struct usb_serial_port *port, int on) { if (on) f81232_set_mctrl(port, TIOCM_DTR | TIOCM_RTS, 0); else f81232_set_mctrl(port, 0, TIOCM_DTR | TIOCM_RTS); } static bool f81232_tx_empty(struct usb_serial_port *port) { int status; u8 tmp; status = f81232_get_register(port, LINE_STATUS_REGISTER, &tmp); if (!status) { if ((tmp & UART_LSR_TEMT) != UART_LSR_TEMT) return false; } return true; } static int f81232_carrier_raised(struct usb_serial_port *port) { u8 msr; struct f81232_private *priv = usb_get_serial_port_data(port); mutex_lock(&priv->lock); msr = priv->modem_status; mutex_unlock(&priv->lock); if (msr & UART_MSR_DCD) return 1; return 0; } static void f81232_get_serial(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct f81232_private *priv = usb_get_serial_port_data(port); ss->baud_base = priv->baud_base; } static void f81232_interrupt_work(struct work_struct *work) { struct f81232_private *priv = container_of(work, struct f81232_private, interrupt_work); f81232_read_msr(priv->port); } static void f81232_lsr_worker(struct work_struct *work) { struct f81232_private *priv; struct usb_serial_port *port; int status; u8 tmp; priv = container_of(work, struct f81232_private, lsr_work); port = priv->port; status = f81232_get_register(port, LINE_STATUS_REGISTER, &tmp); if (status) dev_warn(&port->dev, "read LSR failed: %d\n", status); } static int f81534a_ctrl_set_register(struct usb_interface *intf, u16 reg, u16 size, void *val) { struct usb_device *dev = interface_to_usbdev(intf); int retry = F81534A_ACCESS_REG_RETRY; int status; while (retry--) { status = usb_control_msg_send(dev, 0, F81232_REGISTER_REQUEST, F81232_SET_REGISTER, reg, 0, val, size, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); if (status) { status = usb_translate_errors(status); if (status == -EIO) continue; } break; } if (status) { dev_err(&intf->dev, "failed to set register 0x%x: %d\n", reg, status); } return status; } static int f81534a_ctrl_enable_all_ports(struct usb_interface *intf, bool en) { unsigned char enable[2] = {0}; int status; /* * Enable all available serial ports, define as following: * bit 15 : Reset behavior (when HUB got soft reset) * 0: maintain all serial port enabled state. * 1: disable all serial port. * bit 0~11 : Serial port enable bit. */ if (en) { enable[0] = 0xff; enable[1] = 0x8f; } status = f81534a_ctrl_set_register(intf, F81534A_CTRL_CMD_ENABLE_PORT, sizeof(enable), enable); if (status) dev_err(&intf->dev, "failed to enable ports: %d\n", status); return status; } static int f81534a_ctrl_probe(struct usb_interface *intf, const struct usb_device_id *id) { return f81534a_ctrl_enable_all_ports(intf, true); } static void f81534a_ctrl_disconnect(struct usb_interface *intf) { f81534a_ctrl_enable_all_ports(intf, false); } static int f81534a_ctrl_resume(struct usb_interface *intf) { return f81534a_ctrl_enable_all_ports(intf, true); } static int f81232_port_probe(struct usb_serial_port *port) { struct f81232_private *priv; priv = devm_kzalloc(&port->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; mutex_init(&priv->lock); INIT_WORK(&priv->interrupt_work, f81232_interrupt_work); INIT_WORK(&priv->lsr_work, f81232_lsr_worker); usb_set_serial_port_data(port, priv); priv->port = port; return 0; } static int f81534a_port_probe(struct usb_serial_port *port) { int status; /* tri-state with pull-high, default RS232 Mode */ status = f81232_set_register(port, F81534A_GPIO_REG, F81534A_GPIO_MODE2_DIR); if (status) return status; return f81232_port_probe(port); } static int f81232_suspend(struct usb_serial *serial, pm_message_t message) { struct usb_serial_port *port = serial->port[0]; struct f81232_private *port_priv = usb_get_serial_port_data(port); int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_kill_urb(port->read_urbs[i]); usb_kill_urb(port->interrupt_in_urb); if (port_priv) { flush_work(&port_priv->interrupt_work); flush_work(&port_priv->lsr_work); } return 0; } static int f81232_resume(struct usb_serial *serial) { struct usb_serial_port *port = serial->port[0]; int result; if (tty_port_initialized(&port->port)) { result = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); if (result) { dev_err(&port->dev, "submit interrupt urb failed: %d\n", result); return result; } } return usb_serial_generic_resume(serial); } static struct usb_serial_driver f81232_device = { .driver = { .name = "f81232", }, .id_table = f81232_id_table, .num_ports = 1, .bulk_in_size = 256, .bulk_out_size = 256, .open = f81232_open, .close = f81232_close, .dtr_rts = f81232_dtr_rts, .carrier_raised = f81232_carrier_raised, .get_serial = f81232_get_serial, .break_ctl = f81232_break_ctl, .set_termios = f81232_set_termios, .tiocmget = f81232_tiocmget, .tiocmset = f81232_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .tx_empty = f81232_tx_empty, .process_read_urb = f81232_process_read_urb, .read_int_callback = f81232_read_int_callback, .port_probe = f81232_port_probe, .suspend = f81232_suspend, .resume = f81232_resume, }; static struct usb_serial_driver f81534a_device = { .driver = { .name = "f81534a", }, .id_table = f81534a_id_table, .num_ports = 1, .open = f81534a_open, .close = f81232_close, .dtr_rts = f81232_dtr_rts, .carrier_raised = f81232_carrier_raised, .get_serial = f81232_get_serial, .break_ctl = f81232_break_ctl, .set_termios = f81232_set_termios, .tiocmget = f81232_tiocmget, .tiocmset = f81232_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .tx_empty = f81232_tx_empty, .process_read_urb = f81534a_process_read_urb, .read_int_callback = f81232_read_int_callback, .port_probe = f81534a_port_probe, .suspend = f81232_suspend, .resume = f81232_resume, }; static struct usb_serial_driver * const serial_drivers[] = { &f81232_device, &f81534a_device, NULL, }; static struct usb_driver f81534a_ctrl_driver = { .name = "f81534a_ctrl", .id_table = f81534a_ctrl_id_table, .probe = f81534a_ctrl_probe, .disconnect = f81534a_ctrl_disconnect, .resume = f81534a_ctrl_resume, }; static int __init f81232_init(void) { int status; status = usb_register_driver(&f81534a_ctrl_driver, THIS_MODULE, KBUILD_MODNAME); if (status) return status; status = usb_serial_register_drivers(serial_drivers, KBUILD_MODNAME, combined_id_table); if (status) { usb_deregister(&f81534a_ctrl_driver); return status; } return 0; } static void __exit f81232_exit(void) { usb_serial_deregister_drivers(serial_drivers); usb_deregister(&f81534a_ctrl_driver); } module_init(f81232_init); module_exit(f81232_exit); MODULE_DESCRIPTION("Fintek F81232/532A/534A/535/536 USB to serial driver"); MODULE_AUTHOR("Greg Kroah-Hartman <gregkh@linuxfoundation.org>"); MODULE_AUTHOR("Peter Hong <peter_hong@fintek.com.tw>"); MODULE_LICENSE("GPL v2"); |
| 108 105 4 67 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | // SPDX-License-Identifier: GPL-2.0-only /* loopback transport for vsock using virtio_transport_common APIs * * Copyright (C) 2013-2019 Red Hat, Inc. * Authors: Asias He <asias@redhat.com> * Stefan Hajnoczi <stefanha@redhat.com> * Stefano Garzarella <sgarzare@redhat.com> * */ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/list.h> #include <linux/virtio_vsock.h> struct vsock_loopback { struct workqueue_struct *workqueue; struct sk_buff_head pkt_queue; struct work_struct pkt_work; }; static struct vsock_loopback the_vsock_loopback; static u32 vsock_loopback_get_local_cid(void) { return VMADDR_CID_LOCAL; } static int vsock_loopback_send_pkt(struct sk_buff *skb) { struct vsock_loopback *vsock = &the_vsock_loopback; int len = skb->len; virtio_vsock_skb_queue_tail(&vsock->pkt_queue, skb); queue_work(vsock->workqueue, &vsock->pkt_work); return len; } static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk) { struct vsock_loopback *vsock = &the_vsock_loopback; virtio_transport_purge_skbs(vsk, &vsock->pkt_queue); return 0; } static bool vsock_loopback_seqpacket_allow(u32 remote_cid); static bool vsock_loopback_msgzerocopy_allow(void) { return true; } static struct virtio_transport loopback_transport = { .transport = { .module = THIS_MODULE, .get_local_cid = vsock_loopback_get_local_cid, .init = virtio_transport_do_socket_init, .destruct = virtio_transport_destruct, .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, .cancel_pkt = vsock_loopback_cancel_pkt, .dgram_bind = virtio_transport_dgram_bind, .dgram_dequeue = virtio_transport_dgram_dequeue, .dgram_enqueue = virtio_transport_dgram_enqueue, .dgram_allow = virtio_transport_dgram_allow, .stream_dequeue = virtio_transport_stream_dequeue, .stream_enqueue = virtio_transport_stream_enqueue, .stream_has_data = virtio_transport_stream_has_data, .stream_has_space = virtio_transport_stream_has_space, .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, .stream_is_active = virtio_transport_stream_is_active, .stream_allow = virtio_transport_stream_allow, .seqpacket_dequeue = virtio_transport_seqpacket_dequeue, .seqpacket_enqueue = virtio_transport_seqpacket_enqueue, .seqpacket_allow = vsock_loopback_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, .msgzerocopy_allow = vsock_loopback_msgzerocopy_allow, .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, .notify_send_init = virtio_transport_notify_send_init, .notify_send_pre_block = virtio_transport_notify_send_pre_block, .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .unsent_bytes = virtio_transport_unsent_bytes, .read_skb = virtio_transport_read_skb, }, .send_pkt = vsock_loopback_send_pkt, }; static bool vsock_loopback_seqpacket_allow(u32 remote_cid) { return true; } static void vsock_loopback_work(struct work_struct *work) { struct vsock_loopback *vsock = container_of(work, struct vsock_loopback, pkt_work); struct sk_buff_head pkts; struct sk_buff *skb; skb_queue_head_init(&pkts); spin_lock_bh(&vsock->pkt_queue.lock); skb_queue_splice_init(&vsock->pkt_queue, &pkts); spin_unlock_bh(&vsock->pkt_queue.lock); while ((skb = __skb_dequeue(&pkts))) { /* Decrement the bytes_unsent counter without deallocating skb * It is freed by the receiver. */ virtio_transport_consume_skb_sent(skb, false); virtio_transport_deliver_tap_pkt(skb); virtio_transport_recv_pkt(&loopback_transport, skb); } } static int __init vsock_loopback_init(void) { struct vsock_loopback *vsock = &the_vsock_loopback; int ret; vsock->workqueue = alloc_workqueue("vsock-loopback", WQ_PERCPU, 0); if (!vsock->workqueue) return -ENOMEM; skb_queue_head_init(&vsock->pkt_queue); INIT_WORK(&vsock->pkt_work, vsock_loopback_work); ret = vsock_core_register(&loopback_transport.transport, VSOCK_TRANSPORT_F_LOCAL); if (ret) goto out_wq; return 0; out_wq: destroy_workqueue(vsock->workqueue); return ret; } static void __exit vsock_loopback_exit(void) { struct vsock_loopback *vsock = &the_vsock_loopback; vsock_core_unregister(&loopback_transport.transport); flush_work(&vsock->pkt_work); virtio_vsock_skb_queue_purge(&vsock->pkt_queue); destroy_workqueue(vsock->workqueue); } module_init(vsock_loopback_init); module_exit(vsock_loopback_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Stefano Garzarella <sgarzare@redhat.com>"); MODULE_DESCRIPTION("loopback transport for vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); |
| 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SPECCTRL_H_ #define _ASM_X86_SPECCTRL_H_ #include <linux/thread_info.h> #include <asm/nospec-branch.h> #include <asm/msr.h> /* * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR * the guest has, while on VMEXIT we restore the host view. This * would be easier if SPEC_CTRL were architecturally maskable or * shadowable for guests but this is not (currently) the case. * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest); /** * x86_spec_ctrl_set_guest - Set speculation control registers for the guest * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL * (may get translated to MSR_AMD64_LS_CFG bits) * * Avoids writing to the MSR if the content/bits are the same */ static inline void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl) { x86_virt_spec_ctrl(guest_virt_spec_ctrl, true); } /** * x86_spec_ctrl_restore_host - Restore host speculation control registers * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL * (may get translated to MSR_AMD64_LS_CFG bits) * * Avoids writing to the MSR if the content/bits are the same */ static inline void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl) { x86_virt_spec_ctrl(guest_virt_spec_ctrl, false); } /* AMD specific Speculative Store Bypass MSR data */ extern u64 x86_amd_ls_cfg_base; extern u64 x86_amd_ls_cfg_ssbd_mask; static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) { BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); } static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); } static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; } /* * This can be used in noinstr functions & should only be called in bare * metal context. */ static __always_inline void __update_spec_ctrl(u64 val) { __this_cpu_write(x86_spec_ctrl_current, val); native_wrmsrq(MSR_IA32_SPEC_CTRL, val); } #ifdef CONFIG_SMP extern void speculative_store_bypass_ht_init(void); #else static inline void speculative_store_bypass_ht_init(void) { } #endif extern void speculation_ctrl_update(unsigned long tif); extern void speculation_ctrl_update_current(void); extern bool itlb_multihit_kvm_mitigation; #endif |
| 19 20 20 13 165 139 15 6 11 9 16 1 21 5 8 4 13 4 15 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_script.c * * Copyright (C) 1996 Martin von Löwis * original #!-checking implemented by tytso. */ #include <linux/module.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/binfmts.h> #include <linux/init.h> #include <linux/file.h> #include <linux/err.h> #include <linux/fs.h> static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } static inline const char *next_non_spacetab(const char *first, const char *last) { for (; first <= last; first++) if (!spacetab(*first)) return first; return NULL; } static inline const char *next_terminator(const char *first, const char *last) { for (; first <= last; first++) if (spacetab(*first) || !*first) return first; return NULL; } static int load_script(struct linux_binprm *bprm) { const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; struct file *file; int retval; /* Not ours to exec if we don't start with "#!". */ if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; /* * This section handles parsing the #! line into separate * interpreter path and argument strings. We must be careful * because bprm->buf is not yet guaranteed to be NUL-terminated * (though the buffer will have trailing NUL padding when the * file size was smaller than the buffer size). * * We do not want to exec a truncated interpreter path, so either * we find a newline (which indicates nothing is truncated), or * we find a space/tab/NUL after the interpreter path (which * itself may be preceded by spaces/tabs). Truncating the * arguments is fine: the interpreter can re-read the script to * parse them on its own. */ buf_end = bprm->buf + sizeof(bprm->buf) - 1; i_end = strnchr(bprm->buf, sizeof(bprm->buf), '\n'); if (!i_end) { i_end = next_non_spacetab(bprm->buf + 2, buf_end); if (!i_end) return -ENOEXEC; /* Entire buf is spaces/tabs */ /* * If there is no later space/tab/NUL we must assume the * interpreter path is truncated. */ if (!next_terminator(i_end, buf_end)) return -ENOEXEC; i_end = buf_end; } /* Trim any trailing spaces/tabs from i_end */ while (spacetab(i_end[-1])) i_end--; /* Skip over leading spaces/tabs */ i_name = next_non_spacetab(bprm->buf+2, i_end); if (!i_name || (i_name == i_end)) return -ENOEXEC; /* No interpreter name found */ /* Is there an optional argument? */ i_arg = NULL; i_sep = next_terminator(i_name, i_end); if (i_sep && (*i_sep != '\0')) i_arg = next_non_spacetab(i_sep, i_end); /* * If the script filename will be inaccessible after exec, typically * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give * up now (on the assumption that the interpreter will want to load * this file). */ if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) return -ENOENT; /* * OK, we've parsed out the interpreter name and * (optional) argument. * Splice in (1) the interpreter's name for argv[0] * (2) (optional) argument to interpreter * (3) filename of shell script (replace argv[0]) * * This is done in reverse order, because of how the * user environment and arguments are stored. */ retval = remove_arg_zero(bprm); if (retval) return retval; retval = copy_string_kernel(bprm->interp, bprm); if (retval < 0) return retval; bprm->argc++; *((char *)i_end) = '\0'; if (i_arg) { *((char *)i_sep) = '\0'; retval = copy_string_kernel(i_arg, bprm); if (retval < 0) return retval; bprm->argc++; } retval = copy_string_kernel(i_name, bprm); if (retval) return retval; bprm->argc++; retval = bprm_change_interp(i_name, bprm); if (retval < 0) return retval; /* * OK, now restart the process with the interpreter's dentry. */ file = open_exec(i_name); if (IS_ERR(file)) return PTR_ERR(file); bprm->interpreter = file; return 0; } static struct linux_binfmt script_format = { .module = THIS_MODULE, .load_binary = load_script, }; static int __init init_script_binfmt(void) { register_binfmt(&script_format); return 0; } static void __exit exit_script_binfmt(void) { unregister_binfmt(&script_format); } core_initcall(init_script_binfmt); module_exit(exit_script_binfmt); MODULE_DESCRIPTION("Kernel support for scripts starting with #!"); MODULE_LICENSE("GPL"); |
| 310 312 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | // SPDX-License-Identifier: GPL-2.0 /* Lock down the kernel * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #include <linux/security.h> #include <linux/export.h> #include <linux/lsm_hooks.h> #include <uapi/linux/lsm.h> static enum lockdown_reason kernel_locked_down; static const enum lockdown_reason lockdown_levels[] = {LOCKDOWN_NONE, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_CONFIDENTIALITY_MAX}; /* * Put the kernel into lock-down mode. */ static int lock_kernel_down(const char *where, enum lockdown_reason level) { if (kernel_locked_down >= level) return -EPERM; kernel_locked_down = level; pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n", where); return 0; } static int __init lockdown_param(char *level) { if (!level) return -EINVAL; if (strcmp(level, "integrity") == 0) lock_kernel_down("command line", LOCKDOWN_INTEGRITY_MAX); else if (strcmp(level, "confidentiality") == 0) lock_kernel_down("command line", LOCKDOWN_CONFIDENTIALITY_MAX); else return -EINVAL; return 0; } early_param("lockdown", lockdown_param); /** * lockdown_is_locked_down - Find out if the kernel is locked down * @what: Tag to use in notice generated if lockdown is in effect */ static int lockdown_is_locked_down(enum lockdown_reason what) { if (WARN(what >= LOCKDOWN_CONFIDENTIALITY_MAX, "Invalid lockdown reason")) return -EPERM; if (kernel_locked_down >= what) { if (lockdown_reasons[what]) pr_notice_ratelimited("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n", current->comm, lockdown_reasons[what]); return -EPERM; } return 0; } static struct security_hook_list lockdown_hooks[] __ro_after_init = { LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), }; static const struct lsm_id lockdown_lsmid = { .name = "lockdown", .id = LSM_ID_LOCKDOWN, }; static int __init lockdown_lsm_init(void) { #if defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY) lock_kernel_down("Kernel configuration", LOCKDOWN_INTEGRITY_MAX); #elif defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY) lock_kernel_down("Kernel configuration", LOCKDOWN_CONFIDENTIALITY_MAX); #endif security_add_hooks(lockdown_hooks, ARRAY_SIZE(lockdown_hooks), &lockdown_lsmid); return 0; } static ssize_t lockdown_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { char temp[80] = ""; int i, offset = 0; for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) { enum lockdown_reason level = lockdown_levels[i]; if (lockdown_reasons[level]) { const char *label = lockdown_reasons[level]; if (kernel_locked_down == level) offset += sprintf(temp+offset, "[%s] ", label); else offset += sprintf(temp+offset, "%s ", label); } } /* Convert the last space to a newline if needed. */ if (offset > 0) temp[offset-1] = '\n'; return simple_read_from_buffer(buf, count, ppos, temp, strlen(temp)); } static ssize_t lockdown_write(struct file *file, const char __user *buf, size_t n, loff_t *ppos) { char *state; int i, len, err = -EINVAL; state = memdup_user_nul(buf, n); if (IS_ERR(state)) return PTR_ERR(state); len = strlen(state); if (len && state[len-1] == '\n') { state[len-1] = '\0'; len--; } for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) { enum lockdown_reason level = lockdown_levels[i]; const char *label = lockdown_reasons[level]; if (label && !strcmp(state, label)) err = lock_kernel_down("securityfs", level); } kfree(state); return err ? err : n; } static const struct file_operations lockdown_ops = { .read = lockdown_read, .write = lockdown_write, }; static int __init lockdown_secfs_init(void) { struct dentry *dentry; dentry = securityfs_create_file("lockdown", 0644, NULL, NULL, &lockdown_ops); return PTR_ERR_OR_ZERO(dentry); } core_initcall(lockdown_secfs_init); #ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY DEFINE_EARLY_LSM(lockdown) = { #else DEFINE_LSM(lockdown) = { #endif .name = "lockdown", .init = lockdown_lsm_init, }; |
| 23 2 25 16 86 86 27 27 11 27 16 16 86 86 86 41 45 45 45 6916 6912 6916 6918 1245 1247 1248 6963 6956 1271 10 2 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 | // SPDX-License-Identifier: GPL-2.0 /* * linux/mm/mempool.c * * memory buffer pool support. Such pools are mostly used * for guaranteed, deadlock-free memory allocations during * extreme VM load. * * started by Ingo Molnar, Copyright (C) 2001 * debugging by David Rientjes, Copyright (C) 2015 */ #include <linux/mm.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/kasan.h> #include <linux/kmemleak.h> #include <linux/export.h> #include <linux/mempool.h> #include <linux/writeback.h> #include "slab.h" #ifdef CONFIG_SLUB_DEBUG_ON static void poison_error(mempool_t *pool, void *element, size_t size, size_t byte) { const int nr = pool->curr_nr; const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0); const int end = min_t(int, byte + (BITS_PER_LONG / 8), size); int i; pr_err("BUG: mempool element poison mismatch\n"); pr_err("Mempool %p size %zu\n", pool, size); pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : ""); for (i = start; i < end; i++) pr_cont("%x ", *(u8 *)(element + i)); pr_cont("%s\n", end < size ? "..." : ""); dump_stack(); } static void __check_element(mempool_t *pool, void *element, size_t size) { u8 *obj = element; size_t i; for (i = 0; i < size; i++) { u8 exp = (i < size - 1) ? POISON_FREE : POISON_END; if (obj[i] != exp) { poison_error(pool, element, size, i); return; } } memset(obj, POISON_INUSE, size); } static void check_element(mempool_t *pool, void *element) { /* Skip checking: KASAN might save its metadata in the element. */ if (kasan_enabled()) return; /* Mempools backed by slab allocator */ if (pool->free == mempool_kfree) { __check_element(pool, element, (size_t)pool->pool_data); } else if (pool->free == mempool_free_slab) { __check_element(pool, element, kmem_cache_size(pool->pool_data)); } else if (pool->free == mempool_free_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; void *addr = kmap_local_page((struct page *)element); __check_element(pool, addr, 1UL << (PAGE_SHIFT + order)); kunmap_local(addr); } } static void __poison_element(void *element, size_t size) { u8 *obj = element; memset(obj, POISON_FREE, size - 1); obj[size - 1] = POISON_END; } static void poison_element(mempool_t *pool, void *element) { /* Skip poisoning: KASAN might save its metadata in the element. */ if (kasan_enabled()) return; /* Mempools backed by slab allocator */ if (pool->alloc == mempool_kmalloc) { __poison_element(element, (size_t)pool->pool_data); } else if (pool->alloc == mempool_alloc_slab) { __poison_element(element, kmem_cache_size(pool->pool_data)); } else if (pool->alloc == mempool_alloc_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; void *addr = kmap_local_page((struct page *)element); __poison_element(addr, 1UL << (PAGE_SHIFT + order)); kunmap_local(addr); } } #else /* CONFIG_SLUB_DEBUG_ON */ static inline void check_element(mempool_t *pool, void *element) { } static inline void poison_element(mempool_t *pool, void *element) { } #endif /* CONFIG_SLUB_DEBUG_ON */ static __always_inline bool kasan_poison_element(mempool_t *pool, void *element) { if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) return kasan_mempool_poison_object(element); else if (pool->alloc == mempool_alloc_pages) return kasan_mempool_poison_pages(element, (unsigned long)pool->pool_data); return true; } static void kasan_unpoison_element(mempool_t *pool, void *element) { if (pool->alloc == mempool_kmalloc) kasan_mempool_unpoison_object(element, (size_t)pool->pool_data); else if (pool->alloc == mempool_alloc_slab) kasan_mempool_unpoison_object(element, kmem_cache_size(pool->pool_data)); else if (pool->alloc == mempool_alloc_pages) kasan_mempool_unpoison_pages(element, (unsigned long)pool->pool_data); } static __always_inline void add_element(mempool_t *pool, void *element) { BUG_ON(pool->min_nr != 0 && pool->curr_nr >= pool->min_nr); poison_element(pool, element); if (kasan_poison_element(pool, element)) pool->elements[pool->curr_nr++] = element; } static void *remove_element(mempool_t *pool) { void *element = pool->elements[--pool->curr_nr]; BUG_ON(pool->curr_nr < 0); kasan_unpoison_element(pool, element); check_element(pool, element); return element; } /** * mempool_exit - exit a mempool initialized with mempool_init() * @pool: pointer to the memory pool which was initialized with * mempool_init(). * * Free all reserved elements in @pool and @pool itself. This function * only sleeps if the free_fn() function sleeps. * * May be called on a zeroed but uninitialized mempool (i.e. allocated with * kzalloc()). */ void mempool_exit(mempool_t *pool) { while (pool->curr_nr) { void *element = remove_element(pool); pool->free(element, pool->pool_data); } kfree(pool->elements); pool->elements = NULL; } EXPORT_SYMBOL(mempool_exit); /** * mempool_destroy - deallocate a memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * * Free all reserved elements in @pool and @pool itself. This function * only sleeps if the free_fn() function sleeps. */ void mempool_destroy(mempool_t *pool) { if (unlikely(!pool)) return; mempool_exit(pool); kfree(pool); } EXPORT_SYMBOL(mempool_destroy); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id) { spin_lock_init(&pool->lock); pool->min_nr = min_nr; pool->pool_data = pool_data; pool->alloc = alloc_fn; pool->free = free_fn; init_waitqueue_head(&pool->wait); /* * max() used here to ensure storage for at least 1 element to support * zero minimum pool */ pool->elements = kmalloc_array_node(max(1, min_nr), sizeof(void *), gfp_mask, node_id); if (!pool->elements) return -ENOMEM; /* * First pre-allocate the guaranteed number of buffers, * also pre-allocate 1 element for zero minimum pool. */ while (pool->curr_nr < max(1, pool->min_nr)) { void *element; element = pool->alloc(gfp_mask, pool->pool_data); if (unlikely(!element)) { mempool_exit(pool); return -ENOMEM; } add_element(pool, element); } return 0; } EXPORT_SYMBOL(mempool_init_node); /** * mempool_init - initialize a memory pool * @pool: pointer to the memory pool that should be initialized * @min_nr: the minimum number of elements guaranteed to be * allocated for this pool. * @alloc_fn: user-defined element-allocation function. * @free_fn: user-defined element-freeing function. * @pool_data: optional private data available to the user-defined functions. * * Like mempool_create(), but initializes the pool in (i.e. embedded in another * structure). * * Return: %0 on success, negative error code otherwise. */ int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data) { return mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, GFP_KERNEL, NUMA_NO_NODE); } EXPORT_SYMBOL(mempool_init_noprof); /** * mempool_create_node - create a memory pool * @min_nr: the minimum number of elements guaranteed to be * allocated for this pool. * @alloc_fn: user-defined element-allocation function. * @free_fn: user-defined element-freeing function. * @pool_data: optional private data available to the user-defined functions. * @gfp_mask: memory allocation flags * @node_id: numa node to allocate on * * this function creates and allocates a guaranteed size, preallocated * memory pool. The pool can be used from the mempool_alloc() and mempool_free() * functions. This function might sleep. Both the alloc_fn() and the free_fn() * functions might sleep - as long as the mempool_alloc() function is not called * from IRQ contexts. * * Return: pointer to the created memory pool object or %NULL on error. */ mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id) { mempool_t *pool; pool = kmalloc_node_noprof(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id); if (!pool) return NULL; if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, gfp_mask, node_id)) { kfree(pool); return NULL; } return pool; } EXPORT_SYMBOL(mempool_create_node_noprof); /** * mempool_resize - resize an existing memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * @new_min_nr: the new minimum number of elements guaranteed to be * allocated for this pool. * * This function shrinks/grows the pool. In the case of growing, * it cannot be guaranteed that the pool will be grown to the new * size immediately, but new mempool_free() calls will refill it. * This function may sleep. * * Note, the caller must guarantee that no mempool_destroy is called * while this function is running. mempool_alloc() & mempool_free() * might be called (eg. from IRQ contexts) while this function executes. * * Return: %0 on success, negative error code otherwise. */ int mempool_resize(mempool_t *pool, int new_min_nr) { void *element; void **new_elements; unsigned long flags; BUG_ON(new_min_nr <= 0); might_sleep(); spin_lock_irqsave(&pool->lock, flags); if (new_min_nr <= pool->min_nr) { while (new_min_nr < pool->curr_nr) { element = remove_element(pool); spin_unlock_irqrestore(&pool->lock, flags); pool->free(element, pool->pool_data); spin_lock_irqsave(&pool->lock, flags); } pool->min_nr = new_min_nr; goto out_unlock; } spin_unlock_irqrestore(&pool->lock, flags); /* Grow the pool */ new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements), GFP_KERNEL); if (!new_elements) return -ENOMEM; spin_lock_irqsave(&pool->lock, flags); if (unlikely(new_min_nr <= pool->min_nr)) { /* Raced, other resize will do our work */ spin_unlock_irqrestore(&pool->lock, flags); kfree(new_elements); goto out; } memcpy(new_elements, pool->elements, pool->curr_nr * sizeof(*new_elements)); kfree(pool->elements); pool->elements = new_elements; pool->min_nr = new_min_nr; while (pool->curr_nr < pool->min_nr) { spin_unlock_irqrestore(&pool->lock, flags); element = pool->alloc(GFP_KERNEL, pool->pool_data); if (!element) goto out; spin_lock_irqsave(&pool->lock, flags); if (pool->curr_nr < pool->min_nr) { add_element(pool, element); } else { spin_unlock_irqrestore(&pool->lock, flags); pool->free(element, pool->pool_data); /* Raced */ goto out; } } out_unlock: spin_unlock_irqrestore(&pool->lock, flags); out: return 0; } EXPORT_SYMBOL(mempool_resize); /** * mempool_alloc - allocate an element from a specific memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * @gfp_mask: the usual allocation bitmask. * * this function only sleeps if the alloc_fn() function sleeps or * returns NULL. Note that due to preallocation, this function * *never* fails when called from process contexts. (it might * fail if called from an IRQ context.) * Note: using __GFP_ZERO is not supported. * * Return: pointer to the allocated element or %NULL on error. */ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; wait_queue_entry_t wait; gfp_t gfp_temp; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); might_alloc(gfp_mask); gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ gfp_mask |= __GFP_NOWARN; /* failures are OK */ gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO); repeat_alloc: element = pool->alloc(gfp_temp, pool->pool_data); if (likely(element != NULL)) return element; spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr)) { element = remove_element(pool); spin_unlock_irqrestore(&pool->lock, flags); /* paired with rmb in mempool_free(), read comment there */ smp_wmb(); /* * Update the allocation stack trace as this is more useful * for debugging. */ kmemleak_update_trace(element); return element; } /* * We use gfp mask w/o direct reclaim or IO for the first round. If * alloc failed with that and @pool was empty, retry immediately. */ if (gfp_temp != gfp_mask) { spin_unlock_irqrestore(&pool->lock, flags); gfp_temp = gfp_mask; goto repeat_alloc; } /* We must not sleep if !__GFP_DIRECT_RECLAIM */ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { spin_unlock_irqrestore(&pool->lock, flags); return NULL; } /* Let's wait for someone else to return an element to @pool */ init_wait(&wait); prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(&pool->lock, flags); /* * FIXME: this should be io_schedule(). The timeout is there as a * workaround for some DM problems in 2.6.18. */ io_schedule_timeout(5*HZ); finish_wait(&pool->wait, &wait); goto repeat_alloc; } EXPORT_SYMBOL(mempool_alloc_noprof); /** * mempool_alloc_preallocated - allocate an element from preallocated elements * belonging to a specific memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * * This function is similar to mempool_alloc, but it only attempts allocating * an element from the preallocated elements. It does not sleep and immediately * returns if no preallocated elements are available. * * Return: pointer to the allocated element or %NULL if no elements are * available. */ void *mempool_alloc_preallocated(mempool_t *pool) { void *element; unsigned long flags; spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr)) { element = remove_element(pool); spin_unlock_irqrestore(&pool->lock, flags); /* paired with rmb in mempool_free(), read comment there */ smp_wmb(); /* * Update the allocation stack trace as this is more useful * for debugging. */ kmemleak_update_trace(element); return element; } spin_unlock_irqrestore(&pool->lock, flags); return NULL; } EXPORT_SYMBOL(mempool_alloc_preallocated); /** * mempool_free - return an element to the pool. * @element: pool element pointer. * @pool: pointer to the memory pool which was allocated via * mempool_create(). * * this function only sleeps if the free_fn() function sleeps. */ void mempool_free(void *element, mempool_t *pool) { unsigned long flags; if (unlikely(element == NULL)) return; /* * Paired with the wmb in mempool_alloc(). The preceding read is * for @element and the following @pool->curr_nr. This ensures * that the visible value of @pool->curr_nr is from after the * allocation of @element. This is necessary for fringe cases * where @element was passed to this task without going through * barriers. * * For example, assume @p is %NULL at the beginning and one task * performs "p = mempool_alloc(...);" while another task is doing * "while (!p) cpu_relax(); mempool_free(p, ...);". This function * may end up using curr_nr value which is from before allocation * of @p without the following rmb. */ smp_rmb(); /* * For correctness, we need a test which is guaranteed to trigger * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr * without locking achieves that and refilling as soon as possible * is desirable. * * Because curr_nr visible here is always a value after the * allocation of @element, any task which decremented curr_nr below * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets * incremented to min_nr afterwards. If curr_nr gets incremented * to min_nr after the allocation of @element, the elements * allocated after that are subject to the same guarantee. * * Waiters happen iff curr_nr is 0 and the above guarantee also * ensures that there will be frees which return elements to the * pool waking up the waiters. */ if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); spin_unlock_irqrestore(&pool->lock, flags); if (wq_has_sleeper(&pool->wait)) wake_up(&pool->wait); return; } spin_unlock_irqrestore(&pool->lock, flags); } /* * Handle the min_nr = 0 edge case: * * For zero-minimum pools, curr_nr < min_nr (0 < 0) never succeeds, * so waiters sleeping on pool->wait would never be woken by the * wake-up path of previous test. This explicit check ensures the * allocation of element when both min_nr and curr_nr are 0, and * any active waiters are properly awakened. */ if (unlikely(pool->min_nr == 0 && READ_ONCE(pool->curr_nr) == 0)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr == 0)) { add_element(pool, element); spin_unlock_irqrestore(&pool->lock, flags); if (wq_has_sleeper(&pool->wait)) wake_up(&pool->wait); return; } spin_unlock_irqrestore(&pool->lock, flags); } pool->free(element, pool->pool_data); } EXPORT_SYMBOL(mempool_free); /* * A commonly used alloc and free fn. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { struct kmem_cache *mem = pool_data; VM_BUG_ON(mem->ctor); return kmem_cache_alloc_noprof(mem, gfp_mask); } EXPORT_SYMBOL(mempool_alloc_slab); void mempool_free_slab(void *element, void *pool_data) { struct kmem_cache *mem = pool_data; kmem_cache_free(mem, element); } EXPORT_SYMBOL(mempool_free_slab); /* * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory * specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) { size_t size = (size_t)pool_data; return kmalloc_noprof(size, gfp_mask); } EXPORT_SYMBOL(mempool_kmalloc); void mempool_kfree(void *element, void *pool_data) { kfree(element); } EXPORT_SYMBOL(mempool_kfree); void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data) { size_t size = (size_t)pool_data; return kvmalloc(size, gfp_mask); } EXPORT_SYMBOL(mempool_kvmalloc); void mempool_kvfree(void *element, void *pool_data) { kvfree(element); } EXPORT_SYMBOL(mempool_kvfree); /* * A simple mempool-backed page allocator that allocates pages * of the order specified by pool_data. */ void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) { int order = (int)(long)pool_data; return alloc_pages_noprof(gfp_mask, order); } EXPORT_SYMBOL(mempool_alloc_pages); void mempool_free_pages(void *element, void *pool_data) { int order = (int)(long)pool_data; __free_pages(element, order); } EXPORT_SYMBOL(mempool_free_pages); |
| 2 2 2 1 1 2 2 2 2 2 2 2 2 2 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 | // SPDX-License-Identifier: GPL-2.0+ /* * Driver for Realtek RTS51xx USB card reader * * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. * * Author: * wwang (wei_wang@realsil.com.cn) * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China */ #include <linux/module.h> #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/sched.h> #include <linux/kernel.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <linux/cdrom.h> #include <linux/usb.h> #include <linux/slab.h> #include <linux/usb_usual.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-realtek" MODULE_DESCRIPTION("Driver for Realtek USB Card Reader"); MODULE_AUTHOR("wwang <wei_wang@realsil.com.cn>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); static int auto_delink_en = 1; module_param(auto_delink_en, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(auto_delink_en, "auto delink mode (0=firmware, 1=software [default])"); #ifdef CONFIG_REALTEK_AUTOPM static int ss_en = 1; module_param(ss_en, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ss_en, "enable selective suspend"); static int ss_delay = 50; module_param(ss_delay, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ss_delay, "seconds to delay before entering selective suspend"); enum RTS51X_STAT { RTS51X_STAT_INIT, RTS51X_STAT_IDLE, RTS51X_STAT_RUN, RTS51X_STAT_SS }; #define POLLING_INTERVAL 50 #define rts51x_set_stat(chip, stat) \ ((chip)->state = (enum RTS51X_STAT)(stat)) #define rts51x_get_stat(chip) ((chip)->state) #define SET_LUN_READY(chip, lun) ((chip)->lun_ready |= ((u8)1 << (lun))) #define CLR_LUN_READY(chip, lun) ((chip)->lun_ready &= ~((u8)1 << (lun))) #define TST_LUN_READY(chip, lun) ((chip)->lun_ready & ((u8)1 << (lun))) #endif struct rts51x_status { u16 vid; u16 pid; u8 cur_lun; u8 card_type; u8 total_lun; u16 fw_ver; u8 phy_exist; u8 multi_flag; u8 multi_card; u8 log_exist; union { u8 detailed_type1; u8 detailed_type2; } detailed_type; u8 function[2]; }; struct rts51x_chip { u16 vendor_id; u16 product_id; char max_lun; struct rts51x_status *status; int status_len; u32 flag; struct us_data *us; #ifdef CONFIG_REALTEK_AUTOPM struct timer_list rts51x_suspend_timer; unsigned long timer_expires; int pwr_state; u8 lun_ready; enum RTS51X_STAT state; int support_auto_delink; #endif /* used to back up the protocol chosen in probe1 phase */ proto_cmnd proto_handler_backup; }; /* flag definition */ #define FLIDX_AUTO_DELINK 0x01 #define SCSI_LUN(srb) ((srb)->device->lun) /* Bit Operation */ #define SET_BIT(data, idx) ((data) |= 1 << (idx)) #define CLR_BIT(data, idx) ((data) &= ~(1 << (idx))) #define CHK_BIT(data, idx) ((data) & (1 << (idx))) #define SET_AUTO_DELINK(chip) ((chip)->flag |= FLIDX_AUTO_DELINK) #define CLR_AUTO_DELINK(chip) ((chip)->flag &= ~FLIDX_AUTO_DELINK) #define CHK_AUTO_DELINK(chip) ((chip)->flag & FLIDX_AUTO_DELINK) #define RTS51X_GET_VID(chip) ((chip)->vendor_id) #define RTS51X_GET_PID(chip) ((chip)->product_id) #define VENDOR_ID(chip) ((chip)->status[0].vid) #define PRODUCT_ID(chip) ((chip)->status[0].pid) #define FW_VERSION(chip) ((chip)->status[0].fw_ver) #define STATUS_LEN(chip) ((chip)->status_len) #define STATUS_SUCCESS 0 #define STATUS_FAIL 1 /* Check card reader function */ #define SUPPORT_DETAILED_TYPE1(chip) \ CHK_BIT((chip)->status[0].function[0], 1) #define SUPPORT_OT(chip) \ CHK_BIT((chip)->status[0].function[0], 2) #define SUPPORT_OC(chip) \ CHK_BIT((chip)->status[0].function[0], 3) #define SUPPORT_AUTO_DELINK(chip) \ CHK_BIT((chip)->status[0].function[0], 4) #define SUPPORT_SDIO(chip) \ CHK_BIT((chip)->status[0].function[1], 0) #define SUPPORT_DETAILED_TYPE2(chip) \ CHK_BIT((chip)->status[0].function[1], 1) #define CHECK_PID(chip, pid) (RTS51X_GET_PID(chip) == (pid)) #define CHECK_FW_VER(chip, fw_ver) (FW_VERSION(chip) == (fw_ver)) #define CHECK_ID(chip, pid, fw_ver) \ (CHECK_PID((chip), (pid)) && CHECK_FW_VER((chip), (fw_ver))) static int init_realtek_cr(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ {\ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) \ } static const struct usb_device_id realtek_cr_ids[] = { # include "unusual_realtek.h" {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, realtek_cr_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev realtek_cr_unusual_dev_list[] = { # include "unusual_realtek.h" {} /* Terminating entry */ }; #undef UNUSUAL_DEV static int rts51x_bulk_transport(struct us_data *us, u8 lun, u8 *cmd, int cmd_len, u8 *buf, int buf_len, enum dma_data_direction dir, int *act_len) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *)us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *)us->iobuf; int result; unsigned int residue; unsigned int cswlen; unsigned int cbwlen = US_BULK_CB_WRAP_LEN; /* set up the command wrapper */ bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = cpu_to_le32(buf_len); bcb->Flags = (dir == DMA_FROM_DEVICE) ? US_BULK_FLAG_IN : US_BULK_FLAG_OUT; bcb->Tag = ++us->tag; bcb->Lun = lun; bcb->Length = cmd_len; /* copy the command payload */ memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, cmd, bcb->Length); /* send it to out endpoint */ result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, cbwlen, NULL); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* DATA STAGE */ /* send/receive data payload, if there is any */ if (buf && buf_len) { unsigned int pipe = (dir == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_transfer_buf(us, pipe, buf, buf_len, NULL); if (result == USB_STOR_XFER_ERROR) return USB_STOR_TRANSPORT_ERROR; } /* get CSW for device status */ result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* check bulk status */ if (bcs->Signature != cpu_to_le32(US_BULK_CS_SIGN)) { usb_stor_dbg(us, "Signature mismatch: got %08X, expecting %08X\n", le32_to_cpu(bcs->Signature), US_BULK_CS_SIGN); return USB_STOR_TRANSPORT_ERROR; } residue = le32_to_cpu(bcs->Residue); if (bcs->Tag != us->tag) return USB_STOR_TRANSPORT_ERROR; /* * try to compute the actual residue, based on how much data * was really transferred and what the device tells us */ if (residue > buf_len) residue = buf_len; if (act_len) *act_len = buf_len - residue; /* based on the status code, we report good or bad */ switch (bcs->Status) { case US_BULK_STAT_OK: /* command good -- note that data could be short */ return USB_STOR_TRANSPORT_GOOD; case US_BULK_STAT_FAIL: /* command failed */ return USB_STOR_TRANSPORT_FAILED; case US_BULK_STAT_PHASE: /* * phase error -- note that a transport reset will be * invoked by the invoke_transport() function */ return USB_STOR_TRANSPORT_ERROR; } /* we should never get here, but if we do, we're in trouble */ return USB_STOR_TRANSPORT_ERROR; } static int rts51x_bulk_transport_special(struct us_data *us, u8 lun, u8 *cmd, int cmd_len, u8 *buf, int buf_len, enum dma_data_direction dir, int *act_len) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *) us->iobuf; int result; unsigned int cswlen; unsigned int cbwlen = US_BULK_CB_WRAP_LEN; /* set up the command wrapper */ bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = cpu_to_le32(buf_len); bcb->Flags = (dir == DMA_FROM_DEVICE) ? US_BULK_FLAG_IN : US_BULK_FLAG_OUT; bcb->Tag = ++us->tag; bcb->Lun = lun; bcb->Length = cmd_len; /* copy the command payload */ memset(bcb->CDB, 0, sizeof(bcb->CDB)); memcpy(bcb->CDB, cmd, bcb->Length); /* send it to out endpoint */ result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, cbwlen, NULL); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* DATA STAGE */ /* send/receive data payload, if there is any */ if (buf && buf_len) { unsigned int pipe = (dir == DMA_FROM_DEVICE) ? us->recv_bulk_pipe : us->send_bulk_pipe; result = usb_stor_bulk_transfer_buf(us, pipe, buf, buf_len, NULL); if (result == USB_STOR_XFER_ERROR) return USB_STOR_TRANSPORT_ERROR; } /* get CSW for device status */ result = usb_bulk_msg(us->pusb_dev, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen, 250); return result; } /* Determine what the maximum LUN supported is */ static int rts51x_get_max_lun(struct us_data *us) { int result; /* issue the command */ us->iobuf[0] = 0; result = usb_stor_control_msg(us, us->recv_ctrl_pipe, US_BULK_GET_MAX_LUN, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, us->ifnum, us->iobuf, 1, 10 * HZ); usb_stor_dbg(us, "GetMaxLUN command result is %d, data is %d\n", result, us->iobuf[0]); /* if we have a successful request, return the result */ if (result > 0) return us->iobuf[0]; return 0; } static int rts51x_read_mem(struct us_data *us, u16 addr, u8 *data, u16 len) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmalloc(len, GFP_NOIO); if (buf == NULL) return -ENOMEM; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); cmnd[0] = 0xF0; cmnd[1] = 0x0D; cmnd[2] = (u8) (addr >> 8); cmnd[3] = (u8) addr; cmnd[4] = (u8) (len >> 8); cmnd[5] = (u8) len; retval = rts51x_bulk_transport(us, 0, cmnd, 12, buf, len, DMA_FROM_DEVICE, NULL); if (retval != USB_STOR_TRANSPORT_GOOD) { kfree(buf); return -EIO; } memcpy(data, buf, len); kfree(buf); return 0; } static int rts51x_write_mem(struct us_data *us, u16 addr, u8 *data, u16 len) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmemdup(data, len, GFP_NOIO); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); cmnd[0] = 0xF0; cmnd[1] = 0x0E; cmnd[2] = (u8) (addr >> 8); cmnd[3] = (u8) addr; cmnd[4] = (u8) (len >> 8); cmnd[5] = (u8) len; retval = rts51x_bulk_transport(us, 0, cmnd, 12, buf, len, DMA_TO_DEVICE, NULL); kfree(buf); if (retval != USB_STOR_TRANSPORT_GOOD) return -EIO; return 0; } static int rts51x_read_status(struct us_data *us, u8 lun, u8 *status, int len, int *actlen) { int retval; u8 cmnd[12] = { 0 }; u8 *buf; buf = kmalloc(len, GFP_NOIO); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "lun = %d\n", lun); cmnd[0] = 0xF0; cmnd[1] = 0x09; retval = rts51x_bulk_transport(us, lun, cmnd, 12, buf, len, DMA_FROM_DEVICE, actlen); if (retval != USB_STOR_TRANSPORT_GOOD) { kfree(buf); return -EIO; } memcpy(status, buf, len); kfree(buf); return 0; } static int rts51x_check_status(struct us_data *us, u8 lun) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 buf[16]; retval = rts51x_read_status(us, lun, buf, 16, &(chip->status_len)); if (retval != STATUS_SUCCESS) return -EIO; usb_stor_dbg(us, "chip->status_len = %d\n", chip->status_len); chip->status[lun].vid = ((u16) buf[0] << 8) | buf[1]; chip->status[lun].pid = ((u16) buf[2] << 8) | buf[3]; chip->status[lun].cur_lun = buf[4]; chip->status[lun].card_type = buf[5]; chip->status[lun].total_lun = buf[6]; chip->status[lun].fw_ver = ((u16) buf[7] << 8) | buf[8]; chip->status[lun].phy_exist = buf[9]; chip->status[lun].multi_flag = buf[10]; chip->status[lun].multi_card = buf[11]; chip->status[lun].log_exist = buf[12]; if (chip->status_len == 16) { chip->status[lun].detailed_type.detailed_type1 = buf[13]; chip->status[lun].function[0] = buf[14]; chip->status[lun].function[1] = buf[15]; } return 0; } static int enable_oscillator(struct us_data *us) { int retval; u8 value; retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; value |= 0x04; retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; if (!(value & 0x04)) return -EIO; return 0; } static int __do_config_autodelink(struct us_data *us, u8 *data, u16 len) { int retval; u8 cmnd[12] = {0}; u8 *buf; usb_stor_dbg(us, "addr = 0xfe47, len = %d\n", len); buf = kmemdup(data, len, GFP_NOIO); if (!buf) return USB_STOR_TRANSPORT_ERROR; cmnd[0] = 0xF0; cmnd[1] = 0x0E; cmnd[2] = 0xfe; cmnd[3] = 0x47; cmnd[4] = (u8)(len >> 8); cmnd[5] = (u8)len; retval = rts51x_bulk_transport_special(us, 0, cmnd, 12, buf, len, DMA_TO_DEVICE, NULL); kfree(buf); if (retval != USB_STOR_TRANSPORT_GOOD) { return -EIO; } return 0; } static int do_config_autodelink(struct us_data *us, int enable, int force) { int retval; u8 value; retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (enable) { if (force) value |= 0x03; else value |= 0x01; } else { value &= ~0x03; } usb_stor_dbg(us, "set 0xfe47 to 0x%x\n", value); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; return 0; } static int config_autodelink_after_power_on(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 value; if (!CHK_AUTO_DELINK(chip)) return 0; retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (auto_delink_en) { CLR_BIT(value, 0); CLR_BIT(value, 1); SET_BIT(value, 2); if (CHECK_ID(chip, 0x0138, 0x3882)) CLR_BIT(value, 2); SET_BIT(value, 7); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; retval = enable_oscillator(us); if (retval == 0) (void)do_config_autodelink(us, 1, 0); } else { /* Autodelink controlled by firmware */ SET_BIT(value, 2); if (CHECK_ID(chip, 0x0138, 0x3882)) CLR_BIT(value, 2); if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880)) { CLR_BIT(value, 0); CLR_BIT(value, 7); } /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0xFF; retval = rts51x_write_mem(us, 0xFE79, &value, 1); if (retval < 0) return -EIO; value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } } return 0; } #ifdef CONFIG_PM static int config_autodelink_before_power_down(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 value; if (!CHK_AUTO_DELINK(chip)) return 0; if (auto_delink_en) { retval = rts51x_read_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; SET_BIT(value, 2); retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; SET_BIT(value, 0); if (CHECK_ID(chip, 0x0138, 0x3882)) SET_BIT(value, 2); retval = rts51x_write_mem(us, 0xFE77, &value, 1); if (retval < 0) return -EIO; } else { if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880) || CHECK_ID(chip, 0x0138, 0x3882)) { retval = rts51x_read_mem(us, 0xFE47, &value, 1); if (retval < 0) return -EIO; if (CHECK_ID(chip, 0x0159, 0x5889) || CHECK_ID(chip, 0x0138, 0x3880)) { SET_BIT(value, 0); SET_BIT(value, 7); } if (CHECK_ID(chip, 0x0138, 0x3882)) SET_BIT(value, 2); /* retval = rts51x_write_mem(us, 0xFE47, &value, 1); */ retval = __do_config_autodelink(us, &value, 1); if (retval < 0) return -EIO; } if (CHECK_ID(chip, 0x0159, 0x5888)) { value = 0x01; retval = rts51x_write_mem(us, 0x48, &value, 1); if (retval < 0) return -EIO; } } return 0; } static void fw5895_init(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 val; if ((PRODUCT_ID(chip) != 0x0158) || (FW_VERSION(chip) != 0x5895)) { usb_stor_dbg(us, "Not the specified device, return immediately!\n"); } else { retval = rts51x_read_mem(us, 0xFD6F, &val, 1); if (retval == STATUS_SUCCESS && (val & 0x1F) == 0) { val = 0x1F; retval = rts51x_write_mem(us, 0xFD70, &val, 1); if (retval != STATUS_SUCCESS) usb_stor_dbg(us, "Write memory fail\n"); } else { usb_stor_dbg(us, "Read memory fail, OR (val & 0x1F) != 0\n"); } } } #endif #ifdef CONFIG_REALTEK_AUTOPM static void fw5895_set_mmc_wp(struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); int retval; u8 buf[13]; if ((PRODUCT_ID(chip) != 0x0158) || (FW_VERSION(chip) != 0x5895)) { usb_stor_dbg(us, "Not the specified device, return immediately!\n"); } else { retval = rts51x_read_mem(us, 0xFD6F, buf, 1); if (retval == STATUS_SUCCESS && (buf[0] & 0x24) == 0x24) { /* SD Exist and SD WP */ retval = rts51x_read_mem(us, 0xD04E, buf, 1); if (retval == STATUS_SUCCESS) { buf[0] |= 0x04; retval = rts51x_write_mem(us, 0xFD70, buf, 1); if (retval != STATUS_SUCCESS) usb_stor_dbg(us, "Write memory fail\n"); } else { usb_stor_dbg(us, "Read memory fail\n"); } } else { usb_stor_dbg(us, "Read memory fail, OR (buf[0]&0x24)!=0x24\n"); } } } static void rts51x_modi_suspend_timer(struct rts51x_chip *chip) { struct us_data *us = chip->us; usb_stor_dbg(us, "state:%d\n", rts51x_get_stat(chip)); chip->timer_expires = jiffies + secs_to_jiffies(ss_delay); mod_timer(&chip->rts51x_suspend_timer, chip->timer_expires); } static void rts51x_suspend_timer_fn(struct timer_list *t) { struct rts51x_chip *chip = timer_container_of(chip, t, rts51x_suspend_timer); struct us_data *us = chip->us; switch (rts51x_get_stat(chip)) { case RTS51X_STAT_INIT: case RTS51X_STAT_RUN: rts51x_modi_suspend_timer(chip); break; case RTS51X_STAT_IDLE: case RTS51X_STAT_SS: usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) { usb_stor_dbg(us, "Ready to enter SS state\n"); rts51x_set_stat(chip, RTS51X_STAT_SS); /* ignore mass storage interface's children */ pm_suspend_ignore_children(&us->pusb_intf->dev, true); usb_autopm_put_interface_async(us->pusb_intf); usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); } break; default: usb_stor_dbg(us, "Unknown state !!!\n"); break; } } static inline int working_scsi(struct scsi_cmnd *srb) { if ((srb->cmnd[0] == TEST_UNIT_READY) || (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL)) { return 0; } return 1; } static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) { struct rts51x_chip *chip = (struct rts51x_chip *)(us->extra); static int card_first_show = 1; static const u8 media_not_present[] = { 0x70, 0, 0x02, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x3A, 0, 0, 0, 0, 0 }; static const u8 invalid_cmd_field[] = { 0x70, 0, 0x05, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0x24, 0, 0, 0, 0, 0 }; int ret; if (working_scsi(srb)) { usb_stor_dbg(us, "working scsi, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) { ret = usb_autopm_get_interface(us->pusb_intf); usb_stor_dbg(us, "working scsi, ret=%d\n", ret); } if (rts51x_get_stat(chip) != RTS51X_STAT_RUN) rts51x_set_stat(chip, RTS51X_STAT_RUN); chip->proto_handler_backup(srb, us); } else { if (rts51x_get_stat(chip) == RTS51X_STAT_SS) { usb_stor_dbg(us, "NOT working scsi\n"); if ((srb->cmnd[0] == TEST_UNIT_READY) && (chip->pwr_state == US_SUSPEND)) { if (TST_LUN_READY(chip, srb->device->lun)) { srb->result = SAM_STAT_GOOD; } else { srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, media_not_present, US_SENSE_SIZE); } usb_stor_dbg(us, "TEST_UNIT_READY\n"); goto out; } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { int prevent = srb->cmnd[4] & 0x1; if (prevent) { srb->result = SAM_STAT_CHECK_CONDITION; memcpy(srb->sense_buffer, invalid_cmd_field, US_SENSE_SIZE); } else { srb->result = SAM_STAT_GOOD; } usb_stor_dbg(us, "ALLOW_MEDIUM_REMOVAL\n"); goto out; } } else { usb_stor_dbg(us, "NOT working scsi, not SS\n"); chip->proto_handler_backup(srb, us); /* Check whether card is plugged in */ if (srb->cmnd[0] == TEST_UNIT_READY) { if (srb->result == SAM_STAT_GOOD) { SET_LUN_READY(chip, srb->device->lun); if (card_first_show) { card_first_show = 0; fw5895_set_mmc_wp(us); } } else { CLR_LUN_READY(chip, srb->device->lun); card_first_show = 1; } } if (rts51x_get_stat(chip) != RTS51X_STAT_IDLE) rts51x_set_stat(chip, RTS51X_STAT_IDLE); } } out: usb_stor_dbg(us, "state:%d\n", rts51x_get_stat(chip)); if (rts51x_get_stat(chip) == RTS51X_STAT_RUN) rts51x_modi_suspend_timer(chip); } static int realtek_cr_autosuspend_setup(struct us_data *us) { struct rts51x_chip *chip; struct rts51x_status *status = NULL; u8 buf[16]; int retval; chip = (struct rts51x_chip *)us->extra; chip->support_auto_delink = 0; chip->pwr_state = US_RESUME; chip->lun_ready = 0; rts51x_set_stat(chip, RTS51X_STAT_INIT); retval = rts51x_read_status(us, 0, buf, 16, &(chip->status_len)); if (retval != STATUS_SUCCESS) { usb_stor_dbg(us, "Read status fail\n"); return -EIO; } status = chip->status; status->vid = ((u16) buf[0] << 8) | buf[1]; status->pid = ((u16) buf[2] << 8) | buf[3]; status->cur_lun = buf[4]; status->card_type = buf[5]; status->total_lun = buf[6]; status->fw_ver = ((u16) buf[7] << 8) | buf[8]; status->phy_exist = buf[9]; status->multi_flag = buf[10]; status->multi_card = buf[11]; status->log_exist = buf[12]; if (chip->status_len == 16) { status->detailed_type.detailed_type1 = buf[13]; status->function[0] = buf[14]; status->function[1] = buf[15]; } /* back up the proto_handler in us->extra */ chip = (struct rts51x_chip *)(us->extra); chip->proto_handler_backup = us->proto_handler; /* Set the autosuspend_delay to 0 */ pm_runtime_set_autosuspend_delay(&us->pusb_dev->dev, 0); /* override us->proto_handler setted in get_protocol() */ us->proto_handler = rts51x_invoke_transport; chip->timer_expires = 0; timer_setup(&chip->rts51x_suspend_timer, rts51x_suspend_timer_fn, 0); fw5895_init(us); /* enable autosuspend function of the usb device */ usb_enable_autosuspend(us->pusb_dev); return 0; } #endif static void realtek_cr_destructor(void *extra) { struct rts51x_chip *chip = extra; if (!chip) return; #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) { timer_delete(&chip->rts51x_suspend_timer); chip->timer_expires = 0; } #endif kfree(chip->status); } #ifdef CONFIG_PM static int realtek_cr_suspend(struct usb_interface *iface, pm_message_t message) { struct us_data *us = usb_get_intfdata(iface); /* wait until no command is running */ mutex_lock(&us->dev_mutex); config_autodelink_before_power_down(us); mutex_unlock(&us->dev_mutex); return 0; } static int realtek_cr_resume(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); fw5895_init(us); config_autodelink_after_power_on(us); return 0; } #else #define realtek_cr_suspend NULL #define realtek_cr_resume NULL #endif static int init_realtek_cr(struct us_data *us) { struct rts51x_chip *chip; int size, i, retval; chip = kzalloc(sizeof(struct rts51x_chip), GFP_KERNEL); if (!chip) return -ENOMEM; us->extra = chip; us->extra_destructor = realtek_cr_destructor; us->max_lun = chip->max_lun = rts51x_get_max_lun(us); chip->us = us; usb_stor_dbg(us, "chip->max_lun = %d\n", chip->max_lun); size = (chip->max_lun + 1) * sizeof(struct rts51x_status); chip->status = kzalloc(size, GFP_KERNEL); if (!chip->status) goto INIT_FAIL; for (i = 0; i <= (int)(chip->max_lun); i++) { retval = rts51x_check_status(us, (u8) i); if (retval < 0) goto INIT_FAIL; } if (CHECK_PID(chip, 0x0138) || CHECK_PID(chip, 0x0158) || CHECK_PID(chip, 0x0159)) { if (CHECK_FW_VER(chip, 0x5888) || CHECK_FW_VER(chip, 0x5889) || CHECK_FW_VER(chip, 0x5901)) SET_AUTO_DELINK(chip); if (STATUS_LEN(chip) == 16) { if (SUPPORT_AUTO_DELINK(chip)) SET_AUTO_DELINK(chip); } } #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) realtek_cr_autosuspend_setup(us); #endif usb_stor_dbg(us, "chip->flag = 0x%x\n", chip->flag); (void)config_autodelink_after_power_on(us); return 0; INIT_FAIL: if (us->extra) { kfree(chip->status); kfree(us->extra); us->extra = NULL; } return -EIO; } static struct scsi_host_template realtek_cr_host_template; static int realtek_cr_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; dev_dbg(&intf->dev, "Probe Realtek Card Reader!\n"); result = usb_stor_probe1(&us, intf, id, (id - realtek_cr_ids) + realtek_cr_unusual_dev_list, &realtek_cr_host_template); if (result) return result; result = usb_stor_probe2(us); return result; } static struct usb_driver realtek_cr_driver = { .name = DRV_NAME, .probe = realtek_cr_probe, .disconnect = usb_stor_disconnect, /* .suspend = usb_stor_suspend, */ /* .resume = usb_stor_resume, */ .reset_resume = usb_stor_reset_resume, .suspend = realtek_cr_suspend, .resume = realtek_cr_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = realtek_cr_ids, .soft_unbind = 1, .supports_autosuspend = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(realtek_cr_driver, realtek_cr_host_template, DRV_NAME); |
| 6858 516 6 4 206 203 6 6 6 6 4 48 1 47 47 2 30 2 2 1202 14 1190 5 5 6738 6734 6704 6521 176 1028 186 2 139 58 1057 10 1051 1045 16 2 1059 1060 1059 1060 1058 1058 2 1028 33 32 1031 1172 1153 30 4 26 1176 1174 1174 19 102 1061 1171 1174 1171 1174 1168 1176 1175 1171 1176 44 1138 1167 1173 1168 1908 3 1903 1911 711 1275 652 33 1365 1911 1854 1848 800 766 1221 133 1124 2 1221 1191 70 1226 104 1174 83 1105 1 1174 1126 57 1172 1176 1176 1130 1175 1172 8309 7 8303 9 1 1 9 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> */ #include <linux/dcache.h> #include <linux/fs.h> #include <linux/gfp.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/srcu.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" /* * Clear all of the marks on an inode when it is being evicted from core */ void __fsnotify_inode_delete(struct inode *inode) { fsnotify_clear_marks_by_inode(inode); } EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); void __fsnotify_vfsmount_delete(struct vfsmount *mnt) { fsnotify_clear_marks_by_mount(mnt); } void __fsnotify_mntns_delete(struct mnt_namespace *mntns) { fsnotify_clear_marks_by_mntns(mntns); } /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @sb: superblock being unmounted. * * Called during unmount with no locks held, so needs to be safe against * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. */ static void fsnotify_unmount_inodes(struct super_block *sb) { struct inode *inode, *iput_inode = NULL; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point * the inode cannot have any associated watches. */ spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { spin_unlock(&inode->i_lock); continue; } /* * If i_count is zero, the inode cannot have any watches and * doing an __iget/iput with SB_ACTIVE clear would actually * evict all inodes with zero i_count from icache which is * unnecessarily violent and may in fact be illegal to do. * However, we should have been called /after/ evict_inodes * removed all zero refcount inodes, in any case. Test to * be sure. */ if (!icount_read(inode)) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify_inode(inode, FS_UNMOUNT); fsnotify_inode_delete(inode); iput_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(iput_inode); } void fsnotify_sb_delete(struct super_block *sb) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); /* Were any marks ever added to any object on this sb? */ if (!sbinfo) return; fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ wait_var_event(fsnotify_sb_watched_objects(sb), !atomic_long_read(fsnotify_sb_watched_objects(sb))); WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_PRE_CONTENT)); } void fsnotify_sb_free(struct super_block *sb) { kfree(sb->s_fsnotify_info); } /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the * parent cares. Thus when an event happens on a child it can quickly tell * if there is a need to find a parent and send the event to the parent. */ void fsnotify_set_children_dentry_flags(struct inode *inode) { struct dentry *alias; if (!S_ISDIR(inode->i_mode)) return; spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ spin_lock(&alias->d_lock); hlist_for_each_entry(child, &alias->d_children, d_sib) { if (!child->d_inode) continue; spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } spin_unlock(&alias->d_lock); } spin_unlock(&inode->i_lock); } /* * Lazily clear false positive PARENT_WATCHED flag for child whose parent had * stopped watching children. */ static void fsnotify_clear_child_dentry_flag(struct inode *pinode, struct dentry *dentry) { spin_lock(&dentry->d_lock); /* * d_lock is a sufficient barrier to prevent observing a non-watched * parent state from before the fsnotify_set_children_dentry_flags() * or fsnotify_update_flags() call that had set PARENT_WATCHED. */ if (!fsnotify_inode_watches_children(pinode)) dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&dentry->d_lock); } /* Are inode/sb/mount interested in parent and name info with this event? */ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, __u32 mask) { __u32 marks_mask = 0; /* We only send parent/name to inode/sb/mount for events on non-dir */ if (mask & FS_ISDIR) return false; /* * All events that are possible on child can also may be reported with * parent/name info to inode/sb/mount. Otherwise, a watching parent * could result in events reported with unexpected name info to sb/mount. */ BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); /* Did either inode/sb/mount subscribe for events with parent/name? */ marks_mask |= fsnotify_parent_needed_mask( READ_ONCE(inode->i_fsnotify_mask)); marks_mask |= fsnotify_parent_needed_mask( READ_ONCE(inode->i_sb->s_fsnotify_mask)); marks_mask |= fsnotify_parent_needed_mask(mnt_mask); /* Did they subscribe for this event with parent/name info? */ return mask & marks_mask; } /* Are there any inode/mount/sb objects that watch for these events? */ static inline __u32 fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, __u32 mask) { __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask | READ_ONCE(inode->i_sb->s_fsnotify_mask); return mask & marks_mask & ALL_FSNOTIFY_EVENTS; } /* Report pre-content event with optional range info */ int fsnotify_pre_content(const struct path *path, const loff_t *ppos, size_t count) { struct file_range range; /* Report page aligned range only when pos is known */ if (!ppos) return fsnotify_path(path, FS_PRE_ACCESS); range.path = path; range.pos = PAGE_ALIGN_DOWN(*ppos); range.count = PAGE_ALIGN(*ppos + count) - range.pos; return fsnotify_parent(path->dentry, FS_PRE_ACCESS, &range, FSNOTIFY_EVENT_FILE_RANGE); } /* * Notify this dentry's parent about a child's events with child name info * if parent is watching or if inode/sb/mount are interested in events with * parent and name info. * * Notify only the child without name info if parent is not watching and * inode/sb/mount are not interested in events with parent and name info. */ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { const struct path *path = fsnotify_data_path(data, data_type); __u32 mnt_mask = path ? READ_ONCE(real_mount(path->mnt)->mnt_fsnotify_mask) : 0; struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; bool parent_needed, parent_interested; __u32 p_mask; struct inode *p_inode = NULL; struct name_snapshot name; struct qstr *file_name = NULL; int ret = 0; /* Optimize the likely case of nobody watching this path */ if (likely(!parent_watched && !fsnotify_object_watched(inode, mnt_mask, mask))) return 0; parent = NULL; parent_needed = fsnotify_event_needs_parent(inode, mnt_mask, mask); if (!parent_watched && !parent_needed) goto notify; /* Does parent inode care about events on children? */ parent = dget_parent(dentry); p_inode = parent->d_inode; p_mask = fsnotify_inode_watches_children(p_inode); if (unlikely(parent_watched && !p_mask)) fsnotify_clear_child_dentry_flag(p_inode, dentry); /* * Include parent/name in notification either if some notification * groups require parent info or the parent is interested in this event. */ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); /* Notify both parent and child with child name info */ take_dentry_name_snapshot(&name, dentry); file_name = &name.name; if (parent_interested) mask |= FS_EVENT_ON_CHILD; } notify: ret = fsnotify(mask, data, data_type, p_inode, file_name, inode, 0); if (file_name) release_dentry_name_snapshot(&name); dput(parent); return ret; } EXPORT_SYMBOL_GPL(__fsnotify_parent); static int fsnotify_handle_inode_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); struct inode *inode = fsnotify_data_inode(data, data_type); const struct fsnotify_ops *ops = group->ops; if (WARN_ON_ONCE(!ops->handle_inode_event)) return 0; if (WARN_ON_ONCE(!inode && !dir)) return 0; if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) && path && d_unlinked(path->dentry)) return 0; /* Check interest of this mark in case event was sent with two marks */ if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS)) return 0; return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie); } static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info); int ret; if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) || WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) return 0; /* * For FS_RENAME, 'dir' is old dir and 'data' is new dentry. * The only ->handle_inode_event() backend that supports FS_RENAME is * dnotify, where it means file was renamed within same parent. */ if (mask & FS_RENAME) { struct dentry *moved = fsnotify_data_dentry(data, data_type); if (dir != moved->d_parent->d_inode) return 0; } if (parent_mark) { ret = fsnotify_handle_inode_event(group, parent_mark, mask, data, data_type, dir, name, 0); if (ret) return ret; } if (!inode_mark) return 0; /* * Some events can be sent on both parent dir and child marks (e.g. * FS_ATTRIB). If both parent dir and child are watching, report the * event once to parent dir with name (if interested) and once to child * without name (if interested). * * In any case regardless whether the parent is watching or not, the * child watcher is expecting an event without the FS_EVENT_ON_CHILD * flag. The file name is expected if and only if this is a directory * event. */ mask &= ~FS_EVENT_ON_CHILD; if (!(mask & ALL_FSNOTIFY_DIRENT_EVENTS)) { dir = NULL; name = NULL; } return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type, dir, name, cookie); } static int send_to_group(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignore_mask = 0; bool is_dir = mask & FS_ISDIR; struct fsnotify_mark *mark; int type; if (!iter_info->report_mask) return 0; /* clear ignored on inode modification */ if (mask & FS_MODIFY) { fsnotify_foreach_iter_mark_type(iter_info, mark, type) { if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mark->ignore_mask = 0; } } /* Are any of the group marks interested in this event? */ fsnotify_foreach_iter_mark_type(iter_info, mark, type) { group = mark->group; marks_mask |= mark->mask; marks_ignore_mask |= fsnotify_effective_ignore_mask(mark, is_dir, type); } pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", __func__, group, mask, marks_mask, marks_ignore_mask, data, data_type, dir, cookie); if (!(test_mask & marks_mask & ~marks_ignore_mask)) return 0; if (group->ops->handle_event) { return group->ops->handle_event(group, mask, data, data_type, dir, file_name, cookie, iter_info); } return fsnotify_handle_event(group, mask, data, data_type, dir, file_name, cookie, iter_info); } static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp) { struct fsnotify_mark_connector *conn; struct hlist_node *node = NULL; conn = srcu_dereference(*connp, &fsnotify_mark_srcu); if (conn) node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu); return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) { struct hlist_node *node = NULL; if (mark) node = srcu_dereference(mark->obj_list.next, &fsnotify_mark_srcu); return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } /* * iter_info is a multi head priority queue of marks. * Pick a subset of marks from queue heads, all with the same group * and set the report_mask to a subset of the selected marks. * Returns false if there are no more groups to iterate. */ static bool fsnotify_iter_select_report_types( struct fsnotify_iter_info *iter_info) { struct fsnotify_group *max_prio_group = NULL; struct fsnotify_mark *mark; int type; /* Choose max prio group among groups of all queue heads */ fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; if (mark && fsnotify_compare_groups(max_prio_group, mark->group) > 0) max_prio_group = mark->group; } if (!max_prio_group) return false; /* Set the report mask for marks from same group as max prio group */ iter_info->current_group = max_prio_group; iter_info->report_mask = 0; fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; if (mark && mark->group == iter_info->current_group) { /* * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode * is watching children and interested in this event, * which is an event possible on child. * But is *this mark* watching children? */ if (type == FSNOTIFY_ITER_TYPE_PARENT && !(mark->mask & FS_EVENT_ON_CHILD) && !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD)) continue; fsnotify_iter_set_report_type(iter_info, type); } } return true; } /* * Pop from iter_info multi head queue, the marks that belong to the group of * current iteration step. */ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *mark; int type; /* * We cannot use fsnotify_foreach_iter_mark_type() here because we * may need to advance a mark of type X that belongs to current_group * but was not selected for reporting. */ fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; if (mark && mark->group == iter_info->current_group) iter_info->marks[type] = fsnotify_next_mark(iter_info->marks[type]); } } /* * fsnotify - This is the main call to fsnotify. * * The VFS calls into hook specific functions in linux/fsnotify.h. * Those functions then in turn call here. Here will call out to all of the * registered fsnotify_group. Those groups can then use the notification event * in whatever means they feel necessary. * * @mask: event type and flags * @data: object that event happened on * @data_type: type of object for fanotify_data_XXX() accessors * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to * @file_name: optional file name associated with event * @inode: optional inode associated with event - * If @dir and @inode are both non-NULL, event may be * reported to both. * @cookie: inotify rename cookie */ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, struct inode *inode, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL; struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; struct dentry *moved; int inode2_type; int ret = 0; __u32 test_mask, marks_mask = 0; if (path) mnt = real_mount(path->mnt); if (!inode) { /* Dirent event - report on TYPE_INODE to dir */ inode = dir; /* For FS_RENAME, inode is old_dir and inode2 is new_dir */ if (mask & FS_RENAME) { moved = fsnotify_data_dentry(data, data_type); inode2 = moved->d_parent->d_inode; inode2_type = FSNOTIFY_ITER_TYPE_INODE2; } } else if (mask & FS_EVENT_ON_CHILD) { /* * Event on child - report on TYPE_PARENT to dir if it is * watching children and on TYPE_INODE to child. */ inode2 = dir; inode2_type = FSNOTIFY_ITER_TYPE_PARENT; } /* * Optimization: srcu_read_lock() has a memory barrier which can * be expensive. It protects walking the *_fsnotify_marks lists. * However, if we do not walk the lists, we do not have to do * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && (!inode2 || !inode2->i_fsnotify_marks) && (!mnt_data || !mnt_data->ns->n_fsnotify_marks)) return 0; if (sb) marks_mask |= READ_ONCE(sb->s_fsnotify_mask); if (mnt) marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); if (inode) marks_mask |= READ_ONCE(inode->i_fsnotify_mask); if (inode2) marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); if (mnt_data) marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask); /* * If this is a modify event we may need to clear some ignore masks. * In that case, the object with ignore masks will have the FS_MODIFY * event in its mask. * Otherwise, return if none of the marks care about this type of event. */ test_mask = (mask & ALL_FSNOTIFY_EVENTS); if (!(test_mask & marks_mask)) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); if (sbinfo) { iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = fsnotify_first_mark(&sbinfo->sb_marks); } if (mnt) { iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } if (inode) { iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] = fsnotify_first_mark(&inode->i_fsnotify_marks); } if (inode2) { iter_info.marks[inode2_type] = fsnotify_first_mark(&inode2->i_fsnotify_marks); } if (mnt_data) { iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] = fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks); } /* * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark * ignore masks are properly reflected for mount/sb mark notifications. * That's why this traversal is so complicated... */ while (fsnotify_iter_select_report_types(&iter_info)) { ret = send_to_group(mask, data, data_type, dir, file_name, cookie, &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; fsnotify_iter_next(&iter_info); } ret = 0; out: srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx); return ret; } EXPORT_SYMBOL_GPL(fsnotify); #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* * At open time we check fsnotify_sb_has_priority_watchers(), call the open perm * hook and set the FMODE_NONOTIFY_ mode bits accordignly. * Later, fsnotify permission hooks do not check if there are permission event * watches, but that there were permission event watches at open time. */ int fsnotify_open_perm_and_set_mode(struct file *file) { struct dentry *dentry = file->f_path.dentry, *parent; struct super_block *sb = dentry->d_sb; __u32 mnt_mask, p_mask = 0; /* Is it a file opened by fanotify? */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; /* * Permission events is a super set of pre-content events, so if there * are no permission event watchers, there are also no pre-content event * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit. */ if (likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT))) { file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return 0; } /* * OK, there are some permission event watchers. Check if anybody is * watching for permission events on *this* file. */ mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); p_mask = fsnotify_object_watched(d_inode(dentry), mnt_mask, ALL_FSNOTIFY_PERM_EVENTS); if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { parent = dget_parent(dentry); p_mask |= fsnotify_inode_watches_children(d_inode(parent)); dput(parent); } /* * Legacy FAN_ACCESS_PERM events have very high performance overhead, * so unlikely to be used in the wild. If they are used there will be * no optimizations at all. */ if (unlikely(p_mask & FS_ACCESS_PERM)) { /* Enable all permission and pre-content events */ file_set_fsnotify_mode(file, 0); goto open_perm; } /* * Pre-content events are only supported on regular files. * If there are pre-content event watchers and no permission access * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. * That is the common case with HSM service. */ if (d_is_reg(dentry) && (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)) { file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); goto open_perm; } /* Nobody watching permission and pre-content events on this file */ file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); open_perm: /* * Send open perm events depending on object masks and regardless of * FMODE_NONOTIFY_PERM. */ if (file->f_flags & __FMODE_EXEC && p_mask & FS_OPEN_EXEC_PERM) { int ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) return ret; } if (p_mask & FS_OPEN_PERM) return fsnotify_path(&file->f_path, FS_OPEN_PERM); return 0; } #endif void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) { struct fsnotify_mnt data = { .ns = ns, .mnt_id = real_mount(mnt)->mnt_id_unique, }; if (WARN_ON_ONCE(!ns)) return; /* * This is an optimization as well as making sure fsnotify_init() has * been called. */ if (!ns->n_fsnotify_marks) return; fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0); } static __init int fsnotify_init(void) { int ret; BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) panic("initializing fsnotify_mark_srcu"); fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, SLAB_PANIC); return 0; } core_initcall(fsnotify_init); |
| 187 187 191 191 186 1 30 1 18 37 38 31 28 6 1 66 1 31 34 52 32 35 7 5 2 5 2 6 1 7 7 5 2 4 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/options.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Option parsing */ #include <linux/string.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/nls.h> #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/slab.h> #include "hfsplus_fs.h" enum { opt_creator, opt_type, opt_umask, opt_uid, opt_gid, opt_part, opt_session, opt_nls, opt_decompose, opt_barrier, opt_force, }; static const struct fs_parameter_spec hfs_param_spec[] = { fsparam_string ("creator", opt_creator), fsparam_string ("type", opt_type), fsparam_u32oct ("umask", opt_umask), fsparam_u32 ("uid", opt_uid), fsparam_u32 ("gid", opt_gid), fsparam_u32 ("part", opt_part), fsparam_u32 ("session", opt_session), fsparam_string ("nls", opt_nls), fsparam_flag_no ("decompose", opt_decompose), fsparam_flag_no ("barrier", opt_barrier), fsparam_flag ("force", opt_force), {} }; /* Initialize an options object to reasonable defaults */ void hfsplus_fill_defaults(struct hfsplus_sb_info *opts) { if (!opts) return; opts->creator = HFSPLUS_DEF_CR_TYPE; opts->type = HFSPLUS_DEF_CR_TYPE; opts->umask = current_umask(); opts->uid = current_uid(); opts->gid = current_gid(); opts->part = -1; opts->session = -1; } /* Parse options from mount. Returns nonzero errno on failure */ int hfsplus_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct hfsplus_sb_info *sbi = fc->s_fs_info; struct fs_parse_result result; int opt; /* * Only the force option is examined during remount, all others * are ignored. */ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && strncmp(param->key, "force", 5)) return 0; opt = fs_parse(fc, hfs_param_spec, param, &result); if (opt < 0) return opt; switch (opt) { case opt_creator: if (strlen(param->string) != 4) { pr_err("creator requires a 4 character value\n"); return -EINVAL; } memcpy(&sbi->creator, param->string, 4); break; case opt_type: if (strlen(param->string) != 4) { pr_err("type requires a 4 character value\n"); return -EINVAL; } memcpy(&sbi->type, param->string, 4); break; case opt_umask: sbi->umask = (umode_t)result.uint_32; break; case opt_uid: sbi->uid = result.uid; set_bit(HFSPLUS_SB_UID, &sbi->flags); break; case opt_gid: sbi->gid = result.gid; set_bit(HFSPLUS_SB_GID, &sbi->flags); break; case opt_part: sbi->part = result.uint_32; break; case opt_session: sbi->session = result.uint_32; break; case opt_nls: if (sbi->nls) { pr_err("unable to change nls mapping\n"); return -EINVAL; } sbi->nls = load_nls(param->string); if (!sbi->nls) { pr_err("unable to load nls mapping \"%s\"\n", param->string); return -EINVAL; } break; case opt_decompose: if (result.negated) set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); else clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); break; case opt_barrier: if (result.negated) set_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags); else clear_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags); break; case opt_force: set_bit(HFSPLUS_SB_FORCE, &sbi->flags); break; default: return -EINVAL; } return 0; } int hfsplus_show_options(struct seq_file *seq, struct dentry *root) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb); if (sbi->creator != HFSPLUS_DEF_CR_TYPE) seq_show_option_n(seq, "creator", (char *)&sbi->creator, 4); if (sbi->type != HFSPLUS_DEF_CR_TYPE) seq_show_option_n(seq, "type", (char *)&sbi->type, 4); seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, from_kuid_munged(&init_user_ns, sbi->uid), from_kgid_munged(&init_user_ns, sbi->gid)); if (sbi->part >= 0) seq_printf(seq, ",part=%u", sbi->part); if (sbi->session >= 0) seq_printf(seq, ",session=%u", sbi->session); if (sbi->nls) seq_printf(seq, ",nls=%s", sbi->nls->charset); if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) seq_puts(seq, ",nodecompose"); if (test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) seq_puts(seq, ",nobarrier"); return 0; } |
| 16 1 1 3 1 6 4 1 3 2 1 8 1 2 6 4 2 5 1 5 6 6 1 5 5 5 5 3 2 4 1 5 3 2 1 50 50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 | // SPDX-License-Identifier: GPL-2.0+ /* net/sched/act_ctinfo.c netfilter ctinfo connmark actions * * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/pkt_cls.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_ctinfo.h> #include <net/tc_act/tc_ctinfo.h> #include <net/tc_wrapper.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_zones.h> static struct tc_action_ops act_ctinfo_ops; static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, struct sk_buff *skb, int wlen, int proto) { u8 dscp, newdscp; newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ~INET_ECN_MASK; switch (proto) { case NFPROTO_IPV4: dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK; if (dscp != newdscp) { if (likely(!skb_try_make_writable(skb, wlen))) { ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, newdscp); atomic64_inc(&ca->stats_dscp_set); } else { atomic64_inc(&ca->stats_dscp_error); } } break; case NFPROTO_IPV6: dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK; if (dscp != newdscp) { if (likely(!skb_try_make_writable(skb, wlen))) { ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, newdscp); atomic64_inc(&ca->stats_dscp_set); } else { atomic64_inc(&ca->stats_dscp_error); } } break; default: break; } } static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, struct sk_buff *skb) { atomic64_inc(&ca->stats_cpmark_set); skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash = NULL; struct tcf_ctinfo *ca = to_ctinfo(a); struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; enum ip_conntrack_info ctinfo; struct tcf_ctinfo_params *cp; struct nf_conn *ct; int proto, wlen; cp = rcu_dereference_bh(ca->params); tcf_lastuse_update(&ca->tcf_tm); tcf_action_update_bstats(&ca->common, skb); wlen = skb_network_offset(skb); switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV4; break; case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV6; break; default: goto out; } ct = nf_ct_get(skb, &ctinfo); if (!ct) { /* look harder, usually ingress */ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, cp->net, &tuple)) goto out; zone.id = cp->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; thash = nf_conntrack_find_get(cp->net, &zone, &tuple); if (!thash) goto out; ct = nf_ct_tuplehash_to_ctrack(thash); } if (cp->mode & CTINFO_MODE_DSCP) if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); if (cp->mode & CTINFO_MODE_CPMARK) tcf_ctinfo_cpmark_set(ct, ca, cp, skb); if (thash) nf_ct_put(ct); out: return cp->action; } static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = { [TCA_CTINFO_ACT] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_ctinfo)), [TCA_CTINFO_ZONE] = { .type = NLA_U16 }, [TCA_CTINFO_PARMS_DSCP_MASK] = { .type = NLA_U32 }, [TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 }, [TCA_CTINFO_PARMS_CPMARK_MASK] = { .type = NLA_U32 }, }; static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; struct tcf_ctinfo_params *cp_new; struct tcf_chain *goto_ch = NULL; struct tc_ctinfo *actparm; struct tcf_ctinfo *ci; u8 dscpmaskshift; int ret = 0, err; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "ctinfo requires attributes to be passed"); return -EINVAL; } err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, extack); if (err < 0) return err; if (!tb[TCA_CTINFO_ACT]) { NL_SET_ERR_MSG_MOD(extack, "Missing required TCA_CTINFO_ACT attribute"); return -EINVAL; } actparm = nla_data(tb[TCA_CTINFO_ACT]); /* do some basic validation here before dynamically allocating things */ /* that we would otherwise have to clean up. */ if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) { dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]); /* need contiguous 6 bit mask */ dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0; if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CTINFO_PARMS_DSCP_MASK], "dscp mask must be 6 contiguous bits"); return -EINVAL; } dscpstatemask = nla_get_u32_default(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK], 0); /* mask & statemask must not overlap */ if (dscpmask & dscpstatemask) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CTINFO_PARMS_DSCP_STATEMASK], "dscp statemask must not overlap dscp mask"); return -EINVAL; } } /* done the validation:now to the actual action allocation */ index = actparm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_ctinfo_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (err > 0) { if (bind) /* don't override defaults */ return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } } else { return err; } err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; ci = to_ctinfo(*a); cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL); if (unlikely(!cp_new)) { err = -ENOMEM; goto put_chain; } cp_new->net = net; cp_new->zone = nla_get_u16_default(tb[TCA_CTINFO_ZONE], 0); if (dscpmask) { cp_new->dscpmask = dscpmask; cp_new->dscpmaskshift = dscpmaskshift; cp_new->dscpstatemask = dscpstatemask; cp_new->mode |= CTINFO_MODE_DSCP; } if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) { cp_new->cpmarkmask = nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]); cp_new->mode |= CTINFO_MODE_CPMARK; } cp_new->action = actparm->action; spin_lock_bh(&ci->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch); cp_new = rcu_replace_pointer(ci->params, cp_new, lockdep_is_held(&ci->tcf_lock)); spin_unlock_bh(&ci->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (cp_new) kfree_rcu(cp_new, rcu); return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { const struct tcf_ctinfo *ci = to_ctinfo(a); unsigned char *b = skb_tail_pointer(skb); const struct tcf_ctinfo_params *cp; struct tc_ctinfo opt = { .index = ci->tcf_index, .refcnt = refcount_read(&ci->tcf_refcnt) - ref, .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, }; struct tcf_t t; rcu_read_lock(); cp = rcu_dereference(ci->params); tcf_tm_dump(&t, &ci->tcf_tm); if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD)) goto nla_put_failure; opt.action = cp->action; if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt)) goto nla_put_failure; if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone)) goto nla_put_failure; if (cp->mode & CTINFO_MODE_DSCP) { if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK, cp->dscpmask)) goto nla_put_failure; if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK, cp->dscpstatemask)) goto nla_put_failure; } if (cp->mode & CTINFO_MODE_CPMARK) { if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK, cp->cpmarkmask)) goto nla_put_failure; } if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET, atomic64_read(&ci->stats_dscp_set), TCA_CTINFO_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR, atomic64_read(&ci->stats_dscp_error), TCA_CTINFO_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET, atomic64_read(&ci->stats_cpmark_set), TCA_CTINFO_PAD)) goto nla_put_failure; rcu_read_unlock(); return skb->len; nla_put_failure: rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } static void tcf_ctinfo_cleanup(struct tc_action *a) { struct tcf_ctinfo *ci = to_ctinfo(a); struct tcf_ctinfo_params *cp; cp = rcu_dereference_protected(ci->params, 1); if (cp) kfree_rcu(cp, rcu); } static struct tc_action_ops act_ctinfo_ops = { .kind = "ctinfo", .id = TCA_ID_CTINFO, .owner = THIS_MODULE, .act = tcf_ctinfo_act, .dump = tcf_ctinfo_dump, .init = tcf_ctinfo_init, .cleanup= tcf_ctinfo_cleanup, .size = sizeof(struct tcf_ctinfo), }; MODULE_ALIAS_NET_ACT("ctinfo"); static __net_init int ctinfo_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); return tc_action_net_init(net, tn, &act_ctinfo_ops); } static void __net_exit ctinfo_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_ctinfo_ops.net_id); } static struct pernet_operations ctinfo_net_ops = { .init = ctinfo_init_net, .exit_batch = ctinfo_exit_net, .id = &act_ctinfo_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init ctinfo_init_module(void) { return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops); } static void __exit ctinfo_cleanup_module(void) { tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops); } module_init(ctinfo_init_module); module_exit(ctinfo_cleanup_module); MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>"); MODULE_DESCRIPTION("Connection tracking mark actions"); MODULE_LICENSE("GPL"); |
| 73 59 18 26 1 47 3 3 10 35 32 13 53 3 59 6 38 31 49 18 1 1 1 1 1 5 9 1 6 2 6 6 1 1 1 2 3 3 1 12 2 3 9 15 15 4 15 3 3 3 2 2 2 1 3 3 1 1 2 2 2 1 1 61 61 3 68 68 3 3 10 1 1 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 | // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2020, Red Hat, Inc. */ #define pr_fmt(fmt) "MPTCP: " fmt #include "protocol.h" #include "mptcp_pm_gen.h" #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 static const struct genl_multicast_group mptcp_pm_mcgrps[] = { [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, .flags = GENL_MCAST_CAP_NET_ADMIN, }, }; static int mptcp_pm_family_to_addr(int family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (family == AF_INET6) return MPTCP_PM_ADDR_ATTR_ADDR6; #endif return MPTCP_PM_ADDR_ATTR_ADDR4; } static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], const struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr, bool require_family) { int err, addr_addr; if (!attr) { GENL_SET_ERR_MSG(info, "missing address info"); return -EINVAL; } /* no validation needed - was already done via nested policy */ err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, mptcp_pm_address_nl_policy, info->extack); if (err) return err; if (tb[MPTCP_PM_ADDR_ATTR_ID]) addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) { if (!require_family) return 0; NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing family"); return -EINVAL; } addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); if (addr->family != AF_INET #if IS_ENABLED(CONFIG_MPTCP_IPV6) && addr->family != AF_INET6 #endif ) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "unknown address family"); return -EINVAL; } addr_addr = mptcp_pm_family_to_addr(addr->family); if (!tb[addr_addr]) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing address data"); return -EINVAL; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr->family == AF_INET6) addr->addr6 = nla_get_in6_addr(tb[addr_addr]); else #endif addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]); if (tb[MPTCP_PM_ADDR_ATTR_PORT]) addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); return 0; } int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr) { struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; memset(addr, 0, sizeof(*addr)); return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true); } int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry) { struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; int err; memset(entry, 0, sizeof(*entry)); err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family); if (err) return err; if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { s32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); entry->ifindex = val; } if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); return 0; } static int mptcp_nl_fill_addr(struct sk_buff *skb, struct mptcp_pm_addr_entry *entry) { struct mptcp_addr_info *addr = &entry->addr; struct nlattr *attr; attr = nla_nest_start(skb, MPTCP_PM_ATTR_ADDR); if (!attr) return -EMSGSIZE; if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_FAMILY, addr->family)) goto nla_put_failure; if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id)) goto nla_put_failure; if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags)) goto nla_put_failure; if (entry->ifindex && nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) goto nla_put_failure; if (addr->family == AF_INET && nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, addr->addr.s_addr)) goto nla_put_failure; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->family == AF_INET6 && nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6)) goto nla_put_failure; #endif nla_nest_end(skb, attr); return 0; nla_put_failure: nla_nest_cancel(skb, attr); return -EMSGSIZE; } static int mptcp_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info) { if (info->attrs[MPTCP_PM_ATTR_TOKEN]) return mptcp_userspace_pm_get_addr(id, addr, info); return mptcp_pm_nl_get_addr(id, addr, info); } int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry addr; struct nlattr *attr; struct sk_buff *msg; void *reply; int ret; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) return -EINVAL; attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, info->genlhdr->cmd); if (!reply) { GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); ret = -EMSGSIZE; goto fail; } ret = mptcp_pm_get_addr(addr.addr.id, &addr, info); if (ret) { NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); goto fail; } ret = mptcp_nl_fill_addr(msg, &addr); if (ret) goto fail; genlmsg_end(msg, reply); ret = genlmsg_reply(msg, info); return ret; fail: nlmsg_free(msg); return ret; } int mptcp_pm_genl_fill_addr(struct sk_buff *msg, struct netlink_callback *cb, struct mptcp_pm_addr_entry *entry) { void *hdr; hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &mptcp_genl_family, NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); if (!hdr) return -EINVAL; if (mptcp_nl_fill_addr(msg, entry) < 0) { genlmsg_cancel(msg, hdr); return -EINVAL; } genlmsg_end(msg, hdr); return 0; } static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); if (info->attrs[MPTCP_PM_ATTR_TOKEN]) return mptcp_userspace_pm_dump_addr(msg, cb); return mptcp_pm_nl_dump_addr(msg, cb); } int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { return mptcp_pm_dump_addr(msg, cb); } static int mptcp_pm_set_flags(struct genl_info *info) { struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr_loc; int ret = -EINVAL; if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) return ret; attr_loc = info->attrs[MPTCP_PM_ATTR_ADDR]; ret = mptcp_pm_parse_entry(attr_loc, info, false, &loc); if (ret < 0) return ret; if (info->attrs[MPTCP_PM_ATTR_TOKEN]) return mptcp_userspace_pm_set_flags(&loc, info); return mptcp_pm_nl_set_flags(&loc, info); } int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { return mptcp_pm_set_flags(info); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) { genlmsg_multicast_netns(&mptcp_genl_family, net, nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); } bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) { return genl_has_listeners(&mptcp_genl_family, sock_net((const struct sock *)msk), MPTCP_PM_EV_GRP_OFFSET); } static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) { const struct inet_sock *issk = inet_sk(ssk); const struct mptcp_subflow_context *sf; if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) return -EMSGSIZE; switch (ssk->sk_family) { case AF_INET: if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) return -EMSGSIZE; if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr)) return -EMSGSIZE; break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &issk->pinet6->saddr)) return -EMSGSIZE; if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) return -EMSGSIZE; break; } #endif default: WARN_ON_ONCE(1); return -EMSGSIZE; } if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) return -EMSGSIZE; if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport)) return -EMSGSIZE; sf = mptcp_subflow_ctx(ssk); if (WARN_ON_ONCE(!sf)) return -EINVAL; if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) return -EMSGSIZE; return 0; } static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { const struct sock *sk = (const struct sock *)msk; const struct mptcp_subflow_context *sf; u8 sk_err; if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) return -EMSGSIZE; if (mptcp_event_add_subflow(skb, ssk)) return -EMSGSIZE; sf = mptcp_subflow_ctx(ssk); if (WARN_ON_ONCE(!sf)) return -EINVAL; if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup)) return -EMSGSIZE; if (ssk->sk_bound_dev_if && nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) return -EMSGSIZE; sk_err = READ_ONCE(ssk->sk_err); if (sk_err && sk->sk_state == TCP_ESTABLISHED && nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) return -EMSGSIZE; return 0; } static int mptcp_event_sub_established(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { return mptcp_event_put_token_and_ssk(skb, msk, ssk); } static int mptcp_event_sub_closed(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { const struct mptcp_subflow_context *sf; if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) return -EMSGSIZE; sf = mptcp_subflow_ctx(ssk); if (!sf->reset_seen) return 0; if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason)) return -EMSGSIZE; if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient)) return -EMSGSIZE; return 0; } static int mptcp_event_created(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)); u16 flags = 0; if (err) return err; if (READ_ONCE(msk->pm.server_side)) { flags |= MPTCP_PM_EV_FLAG_SERVER_SIDE; /* Deprecated, and only set when it is the server side */ if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, 1)) return -EMSGSIZE; } if (READ_ONCE(msk->pm.remote_deny_join_id0)) flags |= MPTCP_PM_EV_FLAG_DENY_JOIN_ID0; if (flags && nla_put_u16(skb, MPTCP_ATTR_FLAGS, flags)) return -EMSGSIZE; return mptcp_event_add_subflow(skb, ssk); } void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) { struct net *net = sock_net((const struct sock *)msk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED); if (!nlh) goto nla_put_failure; if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) goto nla_put_failure; genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); return; nla_put_failure: nlmsg_free(skb); } void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct net *net = sock_net(ssk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_ANNOUNCED); if (!nlh) goto nla_put_failure; if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) goto nla_put_failure; if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port == 0 ? inet_sk(ssk)->inet_dport : info->port)) goto nla_put_failure; switch (info->family) { case AF_INET: if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr)) goto nla_put_failure; break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6)) goto nla_put_failure; break; #endif default: WARN_ON_ONCE(1); goto nla_put_failure; } genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); return; nla_put_failure: nlmsg_free(skb); } void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event) { const struct inet_sock *issk = inet_sk(ssk); struct net *net = sock_net(ssk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, event); if (!nlh) goto nla_put_failure; if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) goto nla_put_failure; if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) goto nla_put_failure; switch (ssk->sk_family) { case AF_INET: if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) goto nla_put_failure; break; #if IS_ENABLED(CONFIG_MPTCP_IPV6) case AF_INET6: { if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &issk->pinet6->saddr)) goto nla_put_failure; break; } #endif default: WARN_ON_ONCE(1); goto nla_put_failure; } genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, GFP_KERNEL); return; nla_put_failure: nlmsg_free(skb); } void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) { struct net *net = sock_net((const struct sock *)msk); struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type); if (!nlh) goto nla_put_failure; switch (type) { case MPTCP_EVENT_UNSPEC: WARN_ON_ONCE(1); break; case MPTCP_EVENT_CREATED: case MPTCP_EVENT_ESTABLISHED: if (mptcp_event_created(skb, msk, ssk) < 0) goto nla_put_failure; break; case MPTCP_EVENT_CLOSED: if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)) < 0) goto nla_put_failure; break; case MPTCP_EVENT_ANNOUNCED: case MPTCP_EVENT_REMOVED: /* call mptcp_event_addr_announced()/removed instead */ WARN_ON_ONCE(1); break; case MPTCP_EVENT_SUB_ESTABLISHED: case MPTCP_EVENT_SUB_PRIORITY: if (mptcp_event_sub_established(skb, msk, ssk) < 0) goto nla_put_failure; break; case MPTCP_EVENT_SUB_CLOSED: if (mptcp_event_sub_closed(skb, msk, ssk) < 0) goto nla_put_failure; break; case MPTCP_EVENT_LISTENER_CREATED: case MPTCP_EVENT_LISTENER_CLOSED: break; } genlmsg_end(skb, nlh); mptcp_nl_mcast_send(net, skb, gfp); return; nla_put_failure: nlmsg_free(skb); } struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, .netnsok = true, .module = THIS_MODULE, .ops = mptcp_pm_nl_ops, .n_ops = ARRAY_SIZE(mptcp_pm_nl_ops), .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1, .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), }; void __init mptcp_pm_nl_init(void) { if (genl_register_family(&mptcp_genl_family)) panic("Failed to register MPTCP PM netlink family\n"); } |
| 5 7 7 5 5 5 5 5 5 5 5 5 5 5 61 61 18 30 14 2 14 2 2 2 5 5 5 5 5 4 1 5 5 5 5 5 5 5 87 87 56 2 49 5 2 6 2 36 36 2 2 2 310 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 | // SPDX-License-Identifier: GPL-2.0-only /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ #include <linux/module.h> #include <linux/gfp.h> #include <net/tcp.h> #include <net/rstreason.h> static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); u32 elapsed, user_timeout; s32 remaining; user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (!user_timeout) return icsk->icsk_rto; elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp; if (tp->tcp_usec_ts) elapsed /= USEC_PER_MSEC; remaining = user_timeout - elapsed; if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); } u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 remaining, user_timeout; s32 elapsed; user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (!user_timeout || !icsk->icsk_probes_tstamp) return when; elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp; if (unlikely(elapsed < 0)) elapsed = 0; remaining = msecs_to_jiffies(user_timeout) - elapsed; remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); return min_t(u32, remaining, when); } /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. * * Returns: Nothing (void) */ static void tcp_write_err(struct sock *sk) { tcp_done_with_error(sk, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } /** * tcp_out_of_resources() - Close socket if out of resources * @sk: pointer to current socket * @do_reset: send a last packet with reset flag * * Do not allow orphaned sockets to eat all our resources. * This is direct violation of TCP specs, but it is required * to prevent DoS attacks. It is called when a retransmission timeout * or zero probe timeout occurs on orphaned socket. * * Also close if our net namespace is exiting; in that case there is no * hope of ever communicating again since all netns interfaces are already * down (or about to be down), and we need to release our dst references, * which have been moved to the netns loopback interface, so the namespace * can finish exiting. This condition is only possible if we are a kernel * socket, as those do not hold references to the namespace. * * Criteria is still not confirmed experimentally and may change. * We kill the socket, if: * 1. If number of orphaned sockets exceeds an administratively configured * limit. * 2. If we have strong memory pressure. * 3. If our net namespace is exiting. */ static int tcp_out_of_resources(struct sock *sk, bool do_reset) { struct tcp_sock *tp = tcp_sk(sk); int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*tcp_rto_max(sk) || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ if (READ_ONCE(sk->sk_err_soft)) shift++; if (tcp_check_oom(sk, shift)) { /* Catch exceptional cases, when connection requires reset. * 1. Last segment was sent recently. */ if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ (!tp->snd_wnd && !tp->packets_out)) do_reset = true; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_ABORT_ON_MEMORY); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } if (!check_net(sock_net(sk))) { /* Not possible to send reset; just close */ tcp_done(sk); return 1; } return 0; } /** * tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket * @sk: Pointer to the current socket. * @alive: bool, socket alive state */ static int tcp_orphan_retries(struct sock *sk, bool alive) { int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (READ_ONCE(sk->sk_err_soft) && !alive) retries = 0; /* However, if socket sent something recently, select some safe * number of retries. 8 corresponds to >100 seconds with minimal * RTO of 200msec. */ if (retries == 0 && alive) retries = 8; return retries; } static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { const struct net *net = sock_net(sk); int mss; /* Black hole detection */ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } static unsigned int tcp_model_timeout(struct sock *sk, unsigned int boundary, unsigned int rto_base) { unsigned int linear_backoff_thresh, timeout; linear_backoff_thresh = ilog2(tcp_rto_max(sk) / rto_base); if (boundary <= linear_backoff_thresh) timeout = ((2 << boundary) - 1) * rto_base; else timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + (boundary - linear_backoff_thresh) * tcp_rto_max(sk); return jiffies_to_msecs(timeout); } /** * retransmits_timed_out() - returns true if this connection has timed out * @sk: The current socket * @boundary: max number of retransmissions * @timeout: A custom timeout value. * If set to 0 the default timeout is calculated and used. * Using TCP_RTO_MIN and the number of unsuccessful retransmits. * * The default "timeout" value this function can calculate and use * is equivalent to the timeout of a TCP Connection * after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, unsigned int timeout) { struct tcp_sock *tp = tcp_sk(sk); unsigned int start_ts, delta; if (!inet_csk(sk)->icsk_retransmits) return false; start_ts = tp->retrans_stamp; if (likely(timeout == 0)) { unsigned int rto_base = TCP_RTO_MIN; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) rto_base = tcp_timeout_init(sk); timeout = tcp_model_timeout(sk, boundary, rto_base); } if (tp->tcp_usec_ts) { /* delta maybe off up to a jiffy due to timer granularity. */ delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1); return (s32)(delta - timeout * USEC_PER_MSEC) >= 0; } return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0; } /* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool expired = false, do_reset; int retry_until, max_retransmits; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */ retry_until = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); max_retransmits = retry_until; if (sk->sk_state == TCP_SYN_SENT) max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts); expired = icsk->icsk_retransmits >= max_retransmits; } else { if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); __dst_negative_advice(sk); } retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < tcp_rto_max(sk); retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } } if (!expired) expired = retransmits_timed_out(sk, retry_until, READ_ONCE(icsk->icsk_user_timeout)); tcp_fastopen_active_detect_blackhole(sk, expired); mptcp_active_detect_blackhole(sk, expired); if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG)) tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB, icsk->icsk_retransmits, icsk->icsk_rto, (int)expired); if (expired) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; } if (sk_rethink_txhash(sk)) { tp->timeout_rehash++; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); } return 0; } /* Called with BH disabled */ void tcp_delack_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return; /* Handling the sack compression case */ if (tp->compressed_ack) { tcp_mstamp_refresh(tp); tcp_sack_compress_send_ack(sk); return; } if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) return; if (time_after(icsk_delack_timeout(icsk), jiffies)) { sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk_delack_timeout(icsk)); return; } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_mstamp_refresh(tp); tcp_send_ack(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } } /** * tcp_delack_timer() - The TCP delayed ACK timeout handler * @t: Pointer to the timer. (gets casted to struct sock *) * * This function gets (indirectly) called when the kernel timer for a TCP packet * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work. * * Returns: Nothing (void) */ static void tcp_delack_timer(struct timer_list *t) { struct inet_connection_sock *icsk = timer_container_of(icsk, t, icsk_delack_timer); struct sock *sk = &icsk->icsk_inet.sk; /* Avoid taking socket spinlock if there is no ACK to send. * The compressed_ack check is racy, but a separate hrtimer * will take care of it eventually. */ if (!(smp_load_acquire(&icsk->icsk_ack.pending) & ICSK_ACK_TIMER) && !READ_ONCE(tcp_sk(sk)->compressed_ack)) goto out; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_delack_timer_handler(sk); } else { __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); out: sock_put(sk); } static void tcp_probe_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb = tcp_send_head(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; if (tp->packets_out || !skb) { WRITE_ONCE(icsk->icsk_probes_out, 0); icsk->icsk_probes_tstamp = 0; return; } /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as * long as the receiver continues to respond probes. We support this by * default and reset icsk_probes_out with incoming ACKs. But if the * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we * kill the socket when the retry count and the time exceeds the * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ if (!icsk->icsk_probes_tstamp) { icsk->icsk_probes_tstamp = tcp_jiffies32; } else { u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (user_timeout && (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= msecs_to_jiffies(user_timeout)) goto abort; } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { unsigned int rto_max = tcp_rto_max(sk); const bool alive = inet_csk_rto_backoff(icsk, rto_max) < rto_max; max_probes = tcp_orphan_retries(sk, alive); if (!alive && icsk->icsk_backoff >= max_probes) goto abort; if (tcp_out_of_resources(sk, true)) return; } if (icsk->icsk_probes_out >= max_probes) { abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ tcp_send_probe0(sk); } } static void tcp_update_rto_stats(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); if (!icsk->icsk_retransmits) { tp->total_rto_recoveries++; tp->rto_stamp = tcp_time_stamp_ms(tp); } WRITE_ONCE(icsk->icsk_retransmits, icsk->icsk_retransmits + 1); tp->total_rto++; } /* * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. */ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int max_retries; req->rsk_ops->syn_ack_timeout(req); /* Add one more retry for fastopen. * Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */ max_retries = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; } /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */ if (icsk->icsk_retransmits == 1) tcp_enter_loss(sk); /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error * returned from rtx_syn_ack() to make it more persistent like * regular retransmit because if the child socket has been accepted * it's not good to give up too easily. */ tcp_rtx_synack(sk, req); req->num_timeout++; tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp_ts(tp); tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, req->timeout << req->num_timeout, false); } static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct sk_buff *skb, u32 rtx_delta) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); int timeout = tcp_rto_max(sk) * 2; s32 rcv_delta; if (user_timeout) { /* If user application specified a TCP_USER_TIMEOUT, * it does not want win 0 packets to 'reset the timer' * while retransmits are not making progress. */ if (rtx_delta > user_timeout) return true; timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout)); } /* Note: timer interrupt might have been delayed by at least one jiffy, * and tp->rcv_tstamp might very well have been written recently. * rcv_delta can thus be negative. */ rcv_delta = icsk_timeout(icsk) - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; return msecs_to_jiffies(rtx_delta) > timeout; } /** * tcp_retransmit_timer() - The TCP retransmit timeout handler * @sk: Pointer to the current socket. * * This function gets called when the kernel timer for a TCP packet * of this socket expires. * * It handles retransmission, timer adjustment and other necessary measures. * * Returns: Nothing (void) */ void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *req; struct sk_buff *skb; req = rcu_dereference_protected(tp->fastopen_rsk, lockdep_sock_is_held(sk)); if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); tcp_fastopen_synack_timer(sk, req); /* Before we receive ACK to our SYN-ACK don't retransmit * anything else (e.g., data or FIN segments). */ return; } if (!tp->packets_out) return; skb = tcp_rtx_queue_head(sk); if (WARN_ON_ONCE(!skb)) return; if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits * become zero probes, but we should not timeout this * connection. If the socket is an orphan, time it out, * we cannot allow such beasts to hang infinitely. */ struct inet_sock *inet = inet_sk(sk); u32 rtx_delta; rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); if (tp->tcp_usec_ts) rtx_delta /= USEC_PER_MSEC; if (sk->sk_family == AF_INET) { net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &inet->inet_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt, jiffies_to_msecs(jiffies - tp->rcv_tstamp), rtx_delta); } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &sk->sk_v6_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt, jiffies_to_msecs(jiffies - tp->rcv_tstamp), rtx_delta); } #endif if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) { tcp_write_err(sk); goto out; } tcp_enter_loss(sk); tcp_retransmit_skb(sk, skb, 1); __sk_dst_reset(sk); goto out_reset_timer; } __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); if (tcp_write_timeout(sk)) goto out; if (icsk->icsk_retransmits == 0) { int mib_idx = 0; if (icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; else mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; } else if (icsk->icsk_ca_state == TCP_CA_Loss) { mib_idx = LINUX_MIB_TCPLOSSFAILURES; } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) || tp->sacked_out) { if (tcp_is_sack(tp)) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; } if (mib_idx) __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); tcp_update_rto_stats(sk); if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. */ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_RESOURCE_PROBE_INTERVAL, false); goto out; } /* Increase the timeout each time we retransmit. Note that * we do not increase the rtt estimate. rto is initialized * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests * that doubling rto each time is the least we can get away with. * In KA9Q, Karn uses this for the first few times, and then * goes to quadratic. netBSD doubles, but only goes up to *64, * and clamps at 1 to 64 sec afterwards. Note that 120 sec is * defined in the protocol as the maximum possible RTT. I guess * we'll have to use something other than TCP to talk to the * University of Mars. * * PAWS allows us longer timeouts and large windows, so once * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ out_reset_timer: /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is * used to reset timer, set to 0. Recalculate 'icsk_rto' as this * might be increased if the stream oscillates between thin and thick, * thus the old value might already be too high compared to the value * set by 'tcp_set_rto' in tcp_input.c which resets the rto without * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating * exponential backoff behaviour to avoid continue hammering * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; icsk->icsk_rto = clamp(__tcp_set_rto(tp), tcp_rto_min(sk), tcp_rto_max(sk)); } else if (sk->sk_state != TCP_SYN_SENT || tp->total_rto > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { /* Use normal (exponential) backoff unless linear timeouts are * activated. */ icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, tcp_rto_max(sk)); } tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), false); if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; } /* Called with bottom-half processing disabled. * Called by tcp_write_timer() and tcp_release_cb(). */ void tcp_write_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int event; if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending) return; if (time_after(icsk_timeout(icsk), jiffies)) { sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk_timeout(icsk)); return; } tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; switch (event) { case ICSK_TIME_REO_TIMEOUT: tcp_rack_reo_timeout(sk); break; case ICSK_TIME_LOSS_PROBE: tcp_send_loss_probe(sk); break; case ICSK_TIME_RETRANS: smp_store_release(&icsk->icsk_pending, 0); tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: smp_store_release(&icsk->icsk_pending, 0); tcp_probe_timer(sk); break; } } static void tcp_write_timer(struct timer_list *t) { struct inet_connection_sock *icsk = timer_container_of(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; /* Avoid locking the socket when there is no pending event. */ if (!smp_load_acquire(&icsk->icsk_pending)) goto out; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_write_timer_handler(sk); } else { /* delegate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); out: sock_put(sk); } void tcp_syn_ack_timeout(const struct request_sock *req) { struct net *net = read_pnet(&inet_rsk(req)->ireq_net); __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } EXPORT_IPV6_MOD(tcp_syn_ack_timeout); void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) { sk_reset_timer(sk, &sk->sk_timer, jiffies + len); } static void tcp_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) tcp_delete_keepalive_timer(sk); } EXPORT_IPV6_MOD_GPL(tcp_set_keepalive); static void tcp_keepalive_timer(struct timer_list *t) { struct sock *sk = timer_container_of(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 elapsed; /* Only process if socket is not in use. */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ tcp_reset_keepalive_timer(sk, HZ/20); goto out; } if (sk->sk_state == TCP_LISTEN) { pr_err("Hmm... keepalive on a LISTEN ???\n"); goto out; } tcp_mstamp_refresh(tp); if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { if (READ_ONCE(tp->linger2) >= 0) { const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; if (tmo > 0) { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } } tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE); goto death; } if (!sock_flag(sk, SOCK_KEEPOPEN) || ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) goto out; elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ if (tp->packets_out || !tcp_write_queue_empty(sk)) goto resched; elapsed = keepalive_time_elapsed(tp); if (elapsed >= keepalive_time_when(tp)) { u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); /* If the TCP_USER_TIMEOUT option is enabled, use that * to determine when to timeout instead. */ if ((user_timeout != 0 && elapsed >= msecs_to_jiffies(user_timeout) && icsk->icsk_probes_out > 0) || (user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT); tcp_write_err(sk); goto out; } if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { WRITE_ONCE(icsk->icsk_probes_out, icsk->icsk_probes_out + 1); elapsed = keepalive_intvl_when(tp); } else { /* If keepalive was lost due to local congestion, * try harder. */ elapsed = TCP_RESOURCE_PROBE_INTERVAL; } } else { /* It is tp->rcv_tstamp + keepalive_time_when(tp) */ elapsed = keepalive_time_when(tp) - elapsed; } resched: tcp_reset_keepalive_timer(sk, elapsed); goto out; death: tcp_done(sk); out: bh_unlock_sock(sk); sock_put(sk); } static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) { struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer); struct sock *sk = (struct sock *)tp; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { if (tp->compressed_ack) { /* Since we have to send one ack finally, * subtract one from tp->compressed_ack to keep * LINUX_MIB_TCPACKCOMPRESSED accurate. */ tp->compressed_ack--; tcp_mstamp_refresh(tp); tcp_send_ack(sk); } } else { if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); return HRTIMER_NORESTART; } void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); hrtimer_setup(&tcp_sk(sk)->pacing_timer, tcp_pace_kick, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_SOFT); hrtimer_setup(&tcp_sk(sk)->compressed_ack_timer, tcp_compressed_ack_kick, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_SOFT); } |
| 8 8 8 8 8 8 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/drivers/mmc/core/host.c * * Copyright (C) 2003 Russell King, All Rights Reserved. * Copyright (C) 2007-2008 Pierre Ossman * Copyright (C) 2010 Linus Walleij * * MMC host class device management */ #include <linux/device.h> #include <linux/err.h> #include <linux/idr.h> #include <linux/of.h> #include <linux/pagemap.h> #include <linux/export.h> #include <linux/leds.h> #include <linux/slab.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> #include <linux/mmc/slot-gpio.h> #include "core.h" #include "crypto.h" #include "host.h" #include "slot-gpio.h" #include "pwrseq.h" #include "sdio_ops.h" #define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) static DEFINE_IDA(mmc_host_ida); #ifdef CONFIG_PM_SLEEP static int mmc_host_class_prepare(struct device *dev) { struct mmc_host *host = cls_dev_to_mmc_host(dev); /* * It's safe to access the bus_ops pointer, as both userspace and the * workqueue for detecting cards are frozen at this point. */ if (!host->bus_ops) return 0; /* Validate conditions for system suspend. */ if (host->bus_ops->pre_suspend) return host->bus_ops->pre_suspend(host); return 0; } static void mmc_host_class_complete(struct device *dev) { struct mmc_host *host = cls_dev_to_mmc_host(dev); _mmc_detect_change(host, 0, false); } static const struct dev_pm_ops mmc_host_class_dev_pm_ops = { .prepare = mmc_host_class_prepare, .complete = mmc_host_class_complete, }; #define MMC_HOST_CLASS_DEV_PM_OPS (&mmc_host_class_dev_pm_ops) #else #define MMC_HOST_CLASS_DEV_PM_OPS NULL #endif static void mmc_host_classdev_release(struct device *dev) { struct mmc_host *host = cls_dev_to_mmc_host(dev); wakeup_source_unregister(host->ws); if (of_alias_get_id(host->parent->of_node, "mmc") < 0) ida_free(&mmc_host_ida, host->index); kfree(host); } static int mmc_host_classdev_shutdown(struct device *dev) { struct mmc_host *host = cls_dev_to_mmc_host(dev); __mmc_stop_host(host); return 0; } static const struct class mmc_host_class = { .name = "mmc_host", .dev_release = mmc_host_classdev_release, .shutdown_pre = mmc_host_classdev_shutdown, .pm = MMC_HOST_CLASS_DEV_PM_OPS, }; int mmc_register_host_class(void) { return class_register(&mmc_host_class); } void mmc_unregister_host_class(void) { class_unregister(&mmc_host_class); } /** * mmc_retune_enable() - enter a transfer mode that requires retuning * @host: host which should retune now */ void mmc_retune_enable(struct mmc_host *host) { host->can_retune = 1; if (host->retune_period) mod_timer(&host->retune_timer, jiffies + host->retune_period * HZ); } /* * Pause re-tuning for a small set of operations. The pause begins after the * next command. */ void mmc_retune_pause(struct mmc_host *host) { if (!host->retune_paused) { host->retune_paused = 1; mmc_retune_hold(host); } } EXPORT_SYMBOL(mmc_retune_pause); void mmc_retune_unpause(struct mmc_host *host) { if (host->retune_paused) { host->retune_paused = 0; mmc_retune_release(host); } } EXPORT_SYMBOL(mmc_retune_unpause); /** * mmc_retune_disable() - exit a transfer mode that requires retuning * @host: host which should not retune anymore * * It is not meant for temporarily preventing retuning! */ void mmc_retune_disable(struct mmc_host *host) { mmc_retune_unpause(host); host->can_retune = 0; timer_delete_sync(&host->retune_timer); mmc_retune_clear(host); } void mmc_retune_timer_stop(struct mmc_host *host) { timer_delete_sync(&host->retune_timer); } EXPORT_SYMBOL(mmc_retune_timer_stop); void mmc_retune_hold(struct mmc_host *host) { if (!host->hold_retune) host->retune_now = 1; host->hold_retune += 1; } void mmc_retune_release(struct mmc_host *host) { if (host->hold_retune) host->hold_retune -= 1; else WARN_ON(1); } EXPORT_SYMBOL(mmc_retune_release); int mmc_retune(struct mmc_host *host) { bool return_to_hs400 = false; int err; if (host->retune_now) host->retune_now = 0; else return 0; if (!host->need_retune || host->doing_retune || !host->card) return 0; host->need_retune = 0; host->doing_retune = 1; if (host->ios.timing == MMC_TIMING_MMC_HS400) { err = mmc_hs400_to_hs200(host->card); if (err) goto out; return_to_hs400 = true; } err = mmc_execute_tuning(host->card); if (err) goto out; if (return_to_hs400) err = mmc_hs200_to_hs400(host->card); out: host->doing_retune = 0; return err; } static void mmc_retune_timer(struct timer_list *t) { struct mmc_host *host = timer_container_of(host, t, retune_timer); mmc_retune_needed(host); } static void mmc_of_parse_timing_phase(struct device *dev, const char *prop, struct mmc_clk_phase *phase) { int degrees[2] = {0}; int rc; rc = device_property_read_u32_array(dev, prop, degrees, 2); phase->valid = !rc; if (phase->valid) { phase->in_deg = degrees[0]; phase->out_deg = degrees[1]; } } void mmc_of_parse_clk_phase(struct device *dev, struct mmc_clk_phase_map *map) { mmc_of_parse_timing_phase(dev, "clk-phase-legacy", &map->phase[MMC_TIMING_LEGACY]); mmc_of_parse_timing_phase(dev, "clk-phase-mmc-hs", &map->phase[MMC_TIMING_MMC_HS]); mmc_of_parse_timing_phase(dev, "clk-phase-sd-hs", &map->phase[MMC_TIMING_SD_HS]); mmc_of_parse_timing_phase(dev, "clk-phase-uhs-sdr12", &map->phase[MMC_TIMING_UHS_SDR12]); mmc_of_parse_timing_phase(dev, "clk-phase-uhs-sdr25", &map->phase[MMC_TIMING_UHS_SDR25]); mmc_of_parse_timing_phase(dev, "clk-phase-uhs-sdr50", &map->phase[MMC_TIMING_UHS_SDR50]); mmc_of_parse_timing_phase(dev, "clk-phase-uhs-sdr104", &map->phase[MMC_TIMING_UHS_SDR104]); mmc_of_parse_timing_phase(dev, "clk-phase-uhs-ddr50", &map->phase[MMC_TIMING_UHS_DDR50]); mmc_of_parse_timing_phase(dev, "clk-phase-mmc-ddr52", &map->phase[MMC_TIMING_MMC_DDR52]); mmc_of_parse_timing_phase(dev, "clk-phase-mmc-hs200", &map->phase[MMC_TIMING_MMC_HS200]); mmc_of_parse_timing_phase(dev, "clk-phase-mmc-hs400", &map->phase[MMC_TIMING_MMC_HS400]); } EXPORT_SYMBOL(mmc_of_parse_clk_phase); /** * mmc_of_parse() - parse host's device properties * @host: host whose properties should be parsed. * * To keep the rest of the MMC subsystem unaware of whether DT has been * used to instantiate and configure this host instance or not, we * parse the properties and set respective generic mmc-host flags and * parameters. */ int mmc_of_parse(struct mmc_host *host) { struct device *dev = host->parent; u32 bus_width, drv_type, cd_debounce_delay_ms; int ret; if (!dev || !dev_fwnode(dev)) return 0; /* "bus-width" is translated to MMC_CAP_*_BIT_DATA flags */ if (device_property_read_u32(dev, "bus-width", &bus_width) < 0) { dev_dbg(host->parent, "\"bus-width\" property is missing, assuming 1 bit.\n"); bus_width = 1; } switch (bus_width) { case 8: host->caps |= MMC_CAP_8_BIT_DATA; fallthrough; /* Hosts capable of 8-bit can also do 4 bits */ case 4: host->caps |= MMC_CAP_4_BIT_DATA; break; case 1: break; default: dev_err(host->parent, "Invalid \"bus-width\" value %u!\n", bus_width); return -EINVAL; } /* f_max is obtained from the optional "max-frequency" property */ device_property_read_u32(dev, "max-frequency", &host->f_max); device_property_read_u32(dev, "max-sd-hs-hz", &host->max_sd_hs_hz); /* * Configure CD and WP pins. They are both by default active low to * match the SDHCI spec. If GPIOs are provided for CD and / or WP, the * mmc-gpio helpers are used to attach, configure and use them. If * polarity inversion is specified in DT, one of MMC_CAP2_CD_ACTIVE_HIGH * and MMC_CAP2_RO_ACTIVE_HIGH capability-2 flags is set. If the * "broken-cd" property is provided, the MMC_CAP_NEEDS_POLL capability * is set. If the "non-removable" property is found, the * MMC_CAP_NONREMOVABLE capability is set and no card-detection * configuration is performed. */ /* Parse Card Detection */ if (device_property_read_bool(dev, "non-removable")) { host->caps |= MMC_CAP_NONREMOVABLE; } else { if (device_property_read_bool(dev, "cd-inverted")) host->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH; if (device_property_read_u32(dev, "cd-debounce-delay-ms", &cd_debounce_delay_ms)) cd_debounce_delay_ms = 200; if (device_property_read_bool(dev, "broken-cd")) host->caps |= MMC_CAP_NEEDS_POLL; ret = mmc_gpiod_request_cd(host, "cd", 0, false, cd_debounce_delay_ms * 1000); if (!ret) dev_info(host->parent, "Got CD GPIO\n"); else if (ret != -ENOENT && ret != -ENOSYS) return ret; } /* Parse Write Protection */ if (device_property_read_bool(dev, "wp-inverted")) host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; ret = mmc_gpiod_request_ro(host, "wp", 0, 0); if (!ret) dev_info(host->parent, "Got WP GPIO\n"); else if (ret != -ENOENT && ret != -ENOSYS) return ret; if (device_property_read_bool(dev, "disable-wp")) host->caps2 |= MMC_CAP2_NO_WRITE_PROTECT; if (device_property_read_bool(dev, "cap-sd-highspeed")) host->caps |= MMC_CAP_SD_HIGHSPEED; if (device_property_read_bool(dev, "cap-mmc-highspeed")) host->caps |= MMC_CAP_MMC_HIGHSPEED; if (device_property_read_bool(dev, "sd-uhs-sdr12")) host->caps |= MMC_CAP_UHS_SDR12; if (device_property_read_bool(dev, "sd-uhs-sdr25")) host->caps |= MMC_CAP_UHS_SDR25; if (device_property_read_bool(dev, "sd-uhs-sdr50")) host->caps |= MMC_CAP_UHS_SDR50; if (device_property_read_bool(dev, "sd-uhs-sdr104")) host->caps |= MMC_CAP_UHS_SDR104; if (device_property_read_bool(dev, "sd-uhs-ddr50")) host->caps |= MMC_CAP_UHS_DDR50; if (device_property_read_bool(dev, "cap-power-off-card")) host->caps |= MMC_CAP_POWER_OFF_CARD; if (device_property_read_bool(dev, "cap-mmc-hw-reset")) host->caps |= MMC_CAP_HW_RESET; if (device_property_read_bool(dev, "cap-sdio-irq")) host->caps |= MMC_CAP_SDIO_IRQ; if (device_property_read_bool(dev, "full-pwr-cycle")) host->caps2 |= MMC_CAP2_FULL_PWR_CYCLE; if (device_property_read_bool(dev, "full-pwr-cycle-in-suspend")) host->caps2 |= MMC_CAP2_FULL_PWR_CYCLE_IN_SUSPEND; if (device_property_read_bool(dev, "keep-power-in-suspend")) host->pm_caps |= MMC_PM_KEEP_POWER; if (device_property_read_bool(dev, "wakeup-source") || device_property_read_bool(dev, "enable-sdio-wakeup")) /* legacy */ host->pm_caps |= MMC_PM_WAKE_SDIO_IRQ; if (device_property_read_bool(dev, "mmc-ddr-3_3v")) host->caps |= MMC_CAP_3_3V_DDR; if (device_property_read_bool(dev, "mmc-ddr-1_8v")) host->caps |= MMC_CAP_1_8V_DDR; if (device_property_read_bool(dev, "mmc-ddr-1_2v")) host->caps |= MMC_CAP_1_2V_DDR; if (device_property_read_bool(dev, "mmc-hs200-1_8v")) host->caps2 |= MMC_CAP2_HS200_1_8V_SDR; if (device_property_read_bool(dev, "mmc-hs200-1_2v")) host->caps2 |= MMC_CAP2_HS200_1_2V_SDR; if (device_property_read_bool(dev, "mmc-hs400-1_8v")) host->caps2 |= MMC_CAP2_HS400_1_8V | MMC_CAP2_HS200_1_8V_SDR; if (device_property_read_bool(dev, "mmc-hs400-1_2v")) host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR; if (device_property_read_bool(dev, "mmc-hs400-enhanced-strobe")) host->caps2 |= MMC_CAP2_HS400_ES; if (device_property_read_bool(dev, "no-sdio")) host->caps2 |= MMC_CAP2_NO_SDIO; if (device_property_read_bool(dev, "no-sd")) host->caps2 |= MMC_CAP2_NO_SD; if (device_property_read_bool(dev, "no-mmc")) host->caps2 |= MMC_CAP2_NO_MMC; if (device_property_read_bool(dev, "no-mmc-hs400")) host->caps2 &= ~(MMC_CAP2_HS400_1_8V | MMC_CAP2_HS400_1_2V | MMC_CAP2_HS400_ES); /* Must be after "non-removable" check */ if (device_property_read_u32(dev, "fixed-emmc-driver-type", &drv_type) == 0) { if (host->caps & MMC_CAP_NONREMOVABLE) host->fixed_drv_type = drv_type; else dev_err(host->parent, "can't use fixed driver type, media is removable\n"); } host->dsr_req = !device_property_read_u32(dev, "dsr", &host->dsr); if (host->dsr_req && (host->dsr & ~0xffff)) { dev_err(host->parent, "device tree specified broken value for DSR: 0x%x, ignoring\n", host->dsr); host->dsr_req = 0; } device_property_read_u32(dev, "post-power-on-delay-ms", &host->ios.power_delay_ms); return mmc_pwrseq_alloc(host); } EXPORT_SYMBOL(mmc_of_parse); /** * mmc_of_parse_voltage - return mask of supported voltages * @host: host whose properties should be parsed. * @mask: mask of voltages available for MMC/SD/SDIO * * Parse the "voltage-ranges" property, returning zero if it is not * found, negative errno if the voltage-range specification is invalid, * or one if the voltage-range is specified and successfully parsed. */ int mmc_of_parse_voltage(struct mmc_host *host, u32 *mask) { const char *prop = "voltage-ranges"; struct device *dev = host->parent; u32 *voltage_ranges; int num_ranges, i; int ret; if (!device_property_present(dev, prop)) { dev_dbg(dev, "%s unspecified\n", prop); return 0; } ret = device_property_count_u32(dev, prop); if (ret < 0) return ret; num_ranges = ret / 2; if (!num_ranges) { dev_err(dev, "%s empty\n", prop); return -EINVAL; } voltage_ranges = kcalloc(2 * num_ranges, sizeof(*voltage_ranges), GFP_KERNEL); if (!voltage_ranges) return -ENOMEM; ret = device_property_read_u32_array(dev, prop, voltage_ranges, 2 * num_ranges); if (ret) { kfree(voltage_ranges); return ret; } for (i = 0; i < num_ranges; i++) { const int j = i * 2; u32 ocr_mask; ocr_mask = mmc_vddrange_to_ocrmask(voltage_ranges[j + 0], voltage_ranges[j + 1]); if (!ocr_mask) { dev_err(dev, "range #%d in %s is invalid\n", i, prop); kfree(voltage_ranges); return -EINVAL; } *mask |= ocr_mask; } kfree(voltage_ranges); return 1; } EXPORT_SYMBOL(mmc_of_parse_voltage); /** * mmc_first_nonreserved_index() - get the first index that is not reserved */ static int mmc_first_nonreserved_index(void) { int max; max = of_alias_get_highest_id("mmc"); if (max < 0) return 0; return max + 1; } /** * mmc_alloc_host - initialise the per-host structure. * @extra: sizeof private data structure * @dev: pointer to host device model structure * * Initialise the per-host structure. */ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) { int index; struct mmc_host *host; int alias_id, min_idx, max_idx; host = kzalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); if (!host) return NULL; /* scanning will be enabled when we're ready */ host->rescan_disable = 1; alias_id = of_alias_get_id(dev->of_node, "mmc"); if (alias_id >= 0) { index = alias_id; } else { min_idx = mmc_first_nonreserved_index(); max_idx = 0; index = ida_alloc_range(&mmc_host_ida, min_idx, max_idx - 1, GFP_KERNEL); if (index < 0) { kfree(host); return NULL; } } host->index = index; dev_set_name(&host->class_dev, "mmc%d", host->index); host->ws = wakeup_source_register(NULL, dev_name(&host->class_dev)); host->parent = dev; host->class_dev.parent = dev; host->class_dev.class = &mmc_host_class; device_initialize(&host->class_dev); device_enable_async_suspend(&host->class_dev); if (mmc_gpio_alloc(host)) { put_device(&host->class_dev); return NULL; } spin_lock_init(&host->lock); init_waitqueue_head(&host->wq); INIT_DELAYED_WORK(&host->detect, mmc_rescan); INIT_WORK(&host->sdio_irq_work, sdio_irq_work); timer_setup(&host->retune_timer, mmc_retune_timer, 0); INIT_WORK(&host->supply.uv_work, mmc_undervoltage_workfn); /* * By default, hosts do not support SGIO or large requests. * They have to set these according to their abilities. */ host->max_segs = 1; host->max_seg_size = PAGE_SIZE; host->max_req_size = PAGE_SIZE; host->max_blk_size = 512; host->max_blk_count = PAGE_SIZE / 512; host->fixed_drv_type = -EINVAL; host->ios.power_delay_ms = 10; host->ios.power_mode = MMC_POWER_UNDEFINED; return host; } EXPORT_SYMBOL(mmc_alloc_host); static void devm_mmc_host_release(struct device *dev, void *res) { mmc_free_host(*(struct mmc_host **)res); } struct mmc_host *devm_mmc_alloc_host(struct device *dev, int extra) { struct mmc_host **dr, *host; dr = devres_alloc(devm_mmc_host_release, sizeof(*dr), GFP_KERNEL); if (!dr) return NULL; host = mmc_alloc_host(extra, dev); if (!host) { devres_free(dr); return NULL; } *dr = host; devres_add(dev, dr); return host; } EXPORT_SYMBOL(devm_mmc_alloc_host); static int mmc_validate_host_caps(struct mmc_host *host) { struct device *dev = host->parent; u32 caps = host->caps, caps2 = host->caps2; if (caps & MMC_CAP_SDIO_IRQ && !host->ops->enable_sdio_irq) { dev_warn(dev, "missing ->enable_sdio_irq() ops\n"); return -EINVAL; } if (caps2 & (MMC_CAP2_HS400_ES | MMC_CAP2_HS400) && !(caps & MMC_CAP_8_BIT_DATA) && !(caps2 & MMC_CAP2_NO_MMC)) { dev_warn(dev, "drop HS400 support since no 8-bit bus\n"); host->caps2 = caps2 & ~MMC_CAP2_HS400_ES & ~MMC_CAP2_HS400; } return 0; } /** * mmc_add_host - initialise host hardware * @host: mmc host * * Register the host with the driver model. The host must be * prepared to start servicing requests before this function * completes. */ int mmc_add_host(struct mmc_host *host) { int err; err = mmc_validate_host_caps(host); if (err) return err; err = device_add(&host->class_dev); if (err) return err; led_trigger_register_simple(dev_name(&host->class_dev), &host->led); mmc_add_host_debugfs(host); mmc_start_host(host); return 0; } EXPORT_SYMBOL(mmc_add_host); /** * mmc_remove_host - remove host hardware * @host: mmc host * * Unregister and remove all cards associated with this host, * and power down the MMC bus. No new requests will be issued * after this function has returned. */ void mmc_remove_host(struct mmc_host *host) { mmc_stop_host(host); mmc_remove_host_debugfs(host); device_del(&host->class_dev); led_trigger_unregister_simple(host->led); } EXPORT_SYMBOL(mmc_remove_host); /** * mmc_free_host - free the host structure * @host: mmc host * * Free the host once all references to it have been dropped. */ void mmc_free_host(struct mmc_host *host) { cancel_delayed_work_sync(&host->detect); mmc_pwrseq_free(host); put_device(&host->class_dev); } EXPORT_SYMBOL(mmc_free_host); |
| 6 1 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_fib.h> #include <net/flow.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/route.h> /* don't try to find route from mcast/bcast/zeronet */ static __be32 get_saddr(__be32 addr) { if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || ipv4_is_zeronet(addr)) return 0; return addr; } void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); u32 *dst = ®s->data[priv->dreg]; const struct net_device *dev = NULL; struct iphdr *iph, _iph; __be32 addr; if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } if (priv->flags & NFTA_FIB_F_DADDR) addr = iph->daddr; else addr = iph->saddr; if (priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) { *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); return; } *dst = inet_addr_type_dev_table(nft_net(pkt), pkt->skb->dev, addr); } EXPORT_SYMBOL_GPL(nft_fib4_eval_type); void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; struct iphdr *iph, _iph; struct fib_result res; struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_proto = pkt->tprot, .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), }; const struct net_device *oif; const struct net_device *found; if (nft_fib_can_skip(pkt)) { nft_fib_store_result(dest, priv, nft_in(pkt)); return; } /* * Do not set flowi4_oif, it restricts results (for example, asking * for oif 3 will get RTN_UNICAST result even if the daddr exits * on another interface. * * Search results for the desired outinterface instead. */ if (priv->flags & NFTA_FIB_F_OIF) oif = nft_out(pkt); else if (priv->flags & NFTA_FIB_F_IIF) oif = nft_in(pkt); else oif = NULL; fl4.flowi4_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, oif); iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { nft_fib_store_result(dest, priv, pkt->skb->dev); return; } } if (priv->flags & NFTA_FIB_F_MARK) fl4.flowi4_mark = pkt->skb->mark; fl4.flowi4_dscp = ip4h_dscp(iph); if (priv->flags & NFTA_FIB_F_DADDR) { fl4.daddr = iph->daddr; fl4.saddr = get_saddr(iph->saddr); } else { if (nft_hook(pkt) == NF_INET_FORWARD && priv->flags & NFTA_FIB_F_IIF) fl4.flowi4_iif = nft_out(pkt)->ifindex; fl4.daddr = iph->saddr; fl4.saddr = get_saddr(iph->daddr); } *dest = 0; if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return; switch (res.type) { case RTN_UNICAST: break; case RTN_LOCAL: /* Should not see RTN_LOCAL here */ return; default: break; } if (!oif) { found = FIB_RES_DEV(res); } else { if (!fib_info_nh_uses_dev(res.fi, oif)) return; found = oif; } nft_fib_store_result(dest, priv, found); } EXPORT_SYMBOL_GPL(nft_fib4_eval); static struct nft_expr_type nft_fib4_type; static const struct nft_expr_ops nft_fib4_type_ops = { .type = &nft_fib4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib4_eval_type, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib4_ops = { .type = &nft_fib4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib4_eval, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * nft_fib4_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { enum nft_fib_result result; if (!tb[NFTA_FIB_RESULT]) return ERR_PTR(-EINVAL); result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (result) { case NFT_FIB_RESULT_OIF: return &nft_fib4_ops; case NFT_FIB_RESULT_OIFNAME: return &nft_fib4_ops; case NFT_FIB_RESULT_ADDRTYPE: return &nft_fib4_type_ops; default: return ERR_PTR(-EOPNOTSUPP); } } static struct nft_expr_type nft_fib4_type __read_mostly = { .name = "fib", .select_ops = nft_fib4_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV4, .owner = THIS_MODULE, }; static int __init nft_fib4_module_init(void) { return nft_register_expr(&nft_fib4_type); } static void __exit nft_fib4_module_exit(void) { nft_unregister_expr(&nft_fib4_type); } module_init(nft_fib4_module_init); module_exit(nft_fib4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); MODULE_DESCRIPTION("nftables fib / ip route lookup support"); |
| 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 | // SPDX-License-Identifier: GPL-2.0-or-later /* net/sched/sch_teql.c "True" (or "trivial") link equalizer. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/moduleparam.h> #include <net/dst.h> #include <net/neighbour.h> #include <net/pkt_sched.h> /* How to setup it. ---------------- After loading this module you will find a new device teqlN and new qdisc with the same name. To join a slave to the equalizer you should just set this qdisc on a device f.e. # tc qdisc add dev eth0 root teql0 # tc qdisc add dev eth1 root teql0 That's all. Full PnP 8) Applicability. -------------- 1. Slave devices MUST be active devices, i.e., they must raise the tbusy signal and generate EOI events. If you want to equalize virtual devices like tunnels, use a normal eql device. 2. This device puts no limitations on physical slave characteristics f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-) Certainly, large difference in link speeds will make the resulting eqalized link unusable, because of huge packet reordering. I estimate an upper useful difference as ~10 times. 3. If the slave requires address resolution, only protocols using neighbour cache (IPv4/IPv6) will work over the equalized link. Other protocols are still allowed to use the slave device directly, which will not break load balancing, though native slave traffic will have the highest priority. */ struct teql_master { struct Qdisc_ops qops; struct net_device *dev; struct Qdisc *slaves; struct list_head master_list; unsigned long tx_bytes; unsigned long tx_packets; unsigned long tx_errors; unsigned long tx_dropped; }; struct teql_sched_data { struct Qdisc *next; struct teql_master *m; struct sk_buff_head q; }; #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next) #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT) /* "teql*" qdisc routines */ static int teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) { __skb_queue_tail(&q->q, skb); return NET_XMIT_SUCCESS; } return qdisc_drop(skb, sch, to_free); } static struct sk_buff * teql_dequeue(struct Qdisc *sch) { struct teql_sched_data *dat = qdisc_priv(sch); struct netdev_queue *dat_queue; struct sk_buff *skb; struct Qdisc *q; skb = __skb_dequeue(&dat->q); dat_queue = netdev_get_tx_queue(dat->m->dev, 0); q = rcu_dereference_bh(dat_queue->qdisc); if (skb == NULL) { struct net_device *m = qdisc_dev(q); if (m) { dat->m->slaves = sch; netif_wake_queue(m); } } else { qdisc_bstats_update(sch, skb); } sch->q.qlen = dat->q.qlen + q->q.qlen; return skb; } static struct sk_buff * teql_peek(struct Qdisc *sch) { /* teql is meant to be used as root qdisc */ return NULL; } static void teql_reset(struct Qdisc *sch) { struct teql_sched_data *dat = qdisc_priv(sch); skb_queue_purge(&dat->q); } static void teql_destroy(struct Qdisc *sch) { struct Qdisc *q, *prev; struct teql_sched_data *dat = qdisc_priv(sch); struct teql_master *master = dat->m; if (!master) return; prev = master->slaves; if (prev) { do { q = NEXT_SLAVE(prev); if (q == sch) { NEXT_SLAVE(prev) = NEXT_SLAVE(q); if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { struct netdev_queue *txq; spinlock_t *root_lock; txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); spin_lock_bh(root_lock); qdisc_reset(rtnl_dereference(txq->qdisc)); spin_unlock_bh(root_lock); } } skb_queue_purge(&dat->q); break; } } while ((prev = q) != master->slaves); } } static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct teql_master *m = (struct teql_master *)sch->ops; struct teql_sched_data *q = qdisc_priv(sch); if (dev->hard_header_len > m->dev->hard_header_len) return -EINVAL; if (m->dev == dev) return -ELOOP; q->m = m; skb_queue_head_init(&q->q); if (m->slaves) { if (m->dev->flags & IFF_UP) { if ((m->dev->flags & IFF_POINTOPOINT && !(dev->flags & IFF_POINTOPOINT)) || (m->dev->flags & IFF_BROADCAST && !(dev->flags & IFF_BROADCAST)) || (m->dev->flags & IFF_MULTICAST && !(dev->flags & IFF_MULTICAST)) || dev->mtu < m->dev->mtu) return -EINVAL; } else { if (!(dev->flags&IFF_POINTOPOINT)) m->dev->flags &= ~IFF_POINTOPOINT; if (!(dev->flags&IFF_BROADCAST)) m->dev->flags &= ~IFF_BROADCAST; if (!(dev->flags&IFF_MULTICAST)) m->dev->flags &= ~IFF_MULTICAST; if (dev->mtu < m->dev->mtu) m->dev->mtu = dev->mtu; } q->next = NEXT_SLAVE(m->slaves); NEXT_SLAVE(m->slaves) = sch; } else { q->next = sch; m->slaves = sch; m->dev->mtu = dev->mtu; m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK); } return 0; } static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq, struct dst_entry *dst) { struct neighbour *n; int err = 0; n = dst_neigh_lookup_skb(dst, skb); if (!n) return -ENOENT; if (dst->dev != dev) { struct neighbour *mn; mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev); neigh_release(n); if (IS_ERR(mn)) return PTR_ERR(mn); n = mn; } if (neigh_event_send(n, skb_res) == 0) { int err; char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), haddr, NULL, skb->len); if (err < 0) err = -EINVAL; } else { err = (skb_res == NULL) ? -EAGAIN : 1; } neigh_release(n); return err; } static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq) { struct dst_entry *dst = skb_dst(skb); int res; if (rcu_access_pointer(txq->qdisc) == &noop_qdisc) return -ENODEV; if (!dev->header_ops || !dst) return 0; rcu_read_lock(); res = __teql_resolve(skb, skb_res, dev, txq, dst); rcu_read_unlock(); return res; } static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) { struct teql_master *master = netdev_priv(dev); struct Qdisc *start, *q; int busy; int nores; int subq = skb_get_queue_mapping(skb); struct sk_buff *skb_res = NULL; start = master->slaves; restart: nores = 0; busy = 0; q = start; if (!q) goto drop; do { struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q) continue; if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || !netif_running(slave)) { busy = 1; continue; } switch (teql_resolve(skb, skb_res, slave, slave_txq)) { case 0: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && netdev_start_xmit(skb, slave, slave_txq, false) == NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->tx_packets++; master->tx_bytes += length; return NETDEV_TX_OK; } __netif_tx_unlock(slave_txq); } if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0))) busy = 1; break; case 1: master->slaves = NEXT_SLAVE(q); return NETDEV_TX_OK; default: nores = 1; break; } __skb_pull(skb, skb_network_offset(skb)); } while ((q = NEXT_SLAVE(q)) != start); if (nores && skb_res == NULL) { skb_res = skb; goto restart; } if (busy) { netif_stop_queue(dev); return NETDEV_TX_BUSY; } master->tx_errors++; drop: master->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } static int teql_master_open(struct net_device *dev) { struct Qdisc *q; struct teql_master *m = netdev_priv(dev); int mtu = 0xFFFE; unsigned int flags = IFF_NOARP | IFF_MULTICAST; if (m->slaves == NULL) return -EUNATCH; flags = FMASK; q = m->slaves; do { struct net_device *slave = qdisc_dev(q); if (slave == NULL) return -EUNATCH; if (slave->mtu < mtu) mtu = slave->mtu; if (slave->hard_header_len > LL_MAX_HEADER) return -EINVAL; /* If all the slaves are BROADCAST, master is BROADCAST If all the slaves are PtP, master is PtP Otherwise, master is NBMA. */ if (!(slave->flags&IFF_POINTOPOINT)) flags &= ~IFF_POINTOPOINT; if (!(slave->flags&IFF_BROADCAST)) flags &= ~IFF_BROADCAST; if (!(slave->flags&IFF_MULTICAST)) flags &= ~IFF_MULTICAST; } while ((q = NEXT_SLAVE(q)) != m->slaves); m->dev->mtu = mtu; m->dev->flags = (m->dev->flags&~FMASK) | flags; netif_start_queue(m->dev); return 0; } static int teql_master_close(struct net_device *dev) { netif_stop_queue(dev); return 0; } static void teql_master_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct teql_master *m = netdev_priv(dev); stats->tx_packets = m->tx_packets; stats->tx_bytes = m->tx_bytes; stats->tx_errors = m->tx_errors; stats->tx_dropped = m->tx_dropped; } static int teql_master_mtu(struct net_device *dev, int new_mtu) { struct teql_master *m = netdev_priv(dev); struct Qdisc *q; q = m->slaves; if (q) { do { if (new_mtu > qdisc_dev(q)->mtu) return -EINVAL; } while ((q = NEXT_SLAVE(q)) != m->slaves); } WRITE_ONCE(dev->mtu, new_mtu); return 0; } static const struct net_device_ops teql_netdev_ops = { .ndo_open = teql_master_open, .ndo_stop = teql_master_close, .ndo_start_xmit = teql_master_xmit, .ndo_get_stats64 = teql_master_stats64, .ndo_change_mtu = teql_master_mtu, }; static __init void teql_master_setup(struct net_device *dev) { struct teql_master *master = netdev_priv(dev); struct Qdisc_ops *ops = &master->qops; master->dev = dev; ops->priv_size = sizeof(struct teql_sched_data); ops->enqueue = teql_enqueue; ops->dequeue = teql_dequeue; ops->peek = teql_peek; ops->init = teql_qdisc_init; ops->reset = teql_reset; ops->destroy = teql_destroy; ops->owner = THIS_MODULE; dev->netdev_ops = &teql_netdev_ops; dev->type = ARPHRD_VOID; dev->mtu = 1500; dev->min_mtu = 68; dev->max_mtu = 65535; dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; netif_keep_dst(dev); } static LIST_HEAD(master_dev_list); static int max_equalizers = 1; module_param(max_equalizers, int, 0); MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers"); static int __init teql_init(void) { int i; int err = -ENODEV; for (i = 0; i < max_equalizers; i++) { struct net_device *dev; struct teql_master *master; dev = alloc_netdev(sizeof(struct teql_master), "teql%d", NET_NAME_UNKNOWN, teql_master_setup); if (!dev) { err = -ENOMEM; break; } if ((err = register_netdev(dev))) { free_netdev(dev); break; } master = netdev_priv(dev); strscpy(master->qops.id, dev->name, IFNAMSIZ); err = register_qdisc(&master->qops); if (err) { unregister_netdev(dev); free_netdev(dev); break; } list_add_tail(&master->master_list, &master_dev_list); } return i ? 0 : err; } static void __exit teql_exit(void) { struct teql_master *master, *nxt; list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) { list_del(&master->master_list); unregister_qdisc(&master->qops); unregister_netdev(master->dev); free_netdev(master->dev); } } module_init(teql_init); module_exit(teql_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc"); |
| 3 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | // SPDX-License-Identifier: GPL-2.0 /* * Ioctl to get a verity file's digest * * Copyright 2019 Google LLC */ #include "fsverity_private.h" #include <linux/bpf.h> #include <linux/btf.h> #include <linux/export.h> #include <linux/uaccess.h> /** * fsverity_ioctl_measure() - get a verity file's digest * @filp: file to get digest of * @_uarg: user pointer to fsverity_digest * * Retrieve the file digest that the kernel is enforcing for reads from a verity * file. See the "FS_IOC_MEASURE_VERITY" section of * Documentation/filesystems/fsverity.rst for the documentation. * * Return: 0 on success, -errno on failure */ int fsverity_ioctl_measure(struct file *filp, void __user *_uarg) { const struct inode *inode = file_inode(filp); struct fsverity_digest __user *uarg = _uarg; const struct fsverity_info *vi; const struct fsverity_hash_alg *hash_alg; struct fsverity_digest arg; vi = fsverity_get_info(inode); if (!vi) return -ENODATA; /* not a verity file */ hash_alg = vi->tree_params.hash_alg; /* * The user specifies the digest_size their buffer has space for; we can * return the digest if it fits in the available space. We write back * the actual size, which may be shorter than the user-specified size. */ if (get_user(arg.digest_size, &uarg->digest_size)) return -EFAULT; if (arg.digest_size < hash_alg->digest_size) return -EOVERFLOW; memset(&arg, 0, sizeof(arg)); arg.digest_algorithm = hash_alg - fsverity_hash_algs; arg.digest_size = hash_alg->digest_size; if (copy_to_user(uarg, &arg, sizeof(arg))) return -EFAULT; if (copy_to_user(uarg->digest, vi->file_digest, hash_alg->digest_size)) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(fsverity_ioctl_measure); /** * fsverity_get_digest() - get a verity file's digest * @inode: inode to get digest of * @raw_digest: (out) the raw file digest * @alg: (out) the digest's algorithm, as a FS_VERITY_HASH_ALG_* value * @halg: (out) the digest's algorithm, as a HASH_ALGO_* value * * Retrieves the fsverity digest of the given file. The file must have been * opened at least once since the inode was last loaded into the inode cache; * otherwise this function will not recognize when fsverity is enabled. * * The file's fsverity digest consists of @raw_digest in combination with either * @alg or @halg. (The caller can choose which one of @alg or @halg to use.) * * IMPORTANT: Callers *must* make use of one of the two algorithm IDs, since * @raw_digest is meaningless without knowing which algorithm it uses! fsverity * provides no security guarantee for users who ignore the algorithm ID, even if * they use the digest size (since algorithms can share the same digest size). * * Return: The size of the raw digest in bytes, or 0 if the file doesn't have * fsverity enabled. */ int fsverity_get_digest(struct inode *inode, u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE], u8 *alg, enum hash_algo *halg) { const struct fsverity_info *vi; const struct fsverity_hash_alg *hash_alg; vi = fsverity_get_info(inode); if (!vi) return 0; /* not a verity file */ hash_alg = vi->tree_params.hash_alg; memcpy(raw_digest, vi->file_digest, hash_alg->digest_size); if (alg) *alg = hash_alg - fsverity_hash_algs; if (halg) *halg = hash_alg->algo_id; return hash_alg->digest_size; } EXPORT_SYMBOL_GPL(fsverity_get_digest); #ifdef CONFIG_BPF_SYSCALL /* bpf kfuncs */ __bpf_kfunc_start_defs(); /** * bpf_get_fsverity_digest: read fsverity digest of file * @file: file to get digest from * @digest_p: (out) dynptr for struct fsverity_digest * * Read fsverity_digest of *file* into *digest_ptr*. * * Return: 0 on success, a negative value on error. */ __bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_p) { struct bpf_dynptr_kern *digest_ptr = (struct bpf_dynptr_kern *)digest_p; const struct inode *inode = file_inode(file); u32 dynptr_sz = __bpf_dynptr_size(digest_ptr); struct fsverity_digest *arg; const struct fsverity_info *vi; const struct fsverity_hash_alg *hash_alg; int out_digest_sz; if (dynptr_sz < sizeof(struct fsverity_digest)) return -EINVAL; arg = __bpf_dynptr_data_rw(digest_ptr, dynptr_sz); if (!arg) return -EINVAL; if (!IS_ALIGNED((uintptr_t)arg, __alignof__(*arg))) return -EINVAL; vi = fsverity_get_info(inode); if (!vi) return -ENODATA; /* not a verity file */ hash_alg = vi->tree_params.hash_alg; arg->digest_algorithm = hash_alg - fsverity_hash_algs; arg->digest_size = hash_alg->digest_size; out_digest_sz = dynptr_sz - sizeof(struct fsverity_digest); /* copy digest */ memcpy(arg->digest, vi->file_digest, min_t(int, hash_alg->digest_size, out_digest_sz)); /* fill the extra buffer with zeros */ if (out_digest_sz > hash_alg->digest_size) memset(arg->digest + arg->digest_size, 0, out_digest_sz - hash_alg->digest_size); return 0; } __bpf_kfunc_end_defs(); BTF_KFUNCS_START(fsverity_set_ids) BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS) BTF_KFUNCS_END(fsverity_set_ids) static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id) { if (!btf_id_set8_contains(&fsverity_set_ids, kfunc_id)) return 0; /* Only allow to attach from LSM hooks, to avoid recursion */ return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0; } static const struct btf_kfunc_id_set bpf_fsverity_set = { .owner = THIS_MODULE, .set = &fsverity_set_ids, .filter = bpf_get_fsverity_digest_filter, }; void __init fsverity_init_bpf(void) { register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fsverity_set); } #endif /* CONFIG_BPF_SYSCALL */ |
| 13 10 38 80 50 85 64 52 33 44 30 1 23 23 8 8 8 8 8 8 8 8 8 23 44 27 23 13 35 43 22 1 42 43 10 10 10 10 1 1 19 19 19 19 19 15 15 15 1 1 21 21 21 21 21 21 21 23 23 29 29 29 29 10 6 4 50 48 48 45 47 26 25 25 6 47 47 6 44 3 1 1 41 37 4 1 4 19 34 4 20 40 5 37 12 19 7 7 7 7 7 2 23 23 27 22 25 4 22 27 27 27 27 25 25 22 4 25 25 27 27 21 4 67 1 67 66 69 1 1 67 32 32 31 32 32 32 32 32 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 1 3 7 6 1 7 7 5 2 7 59 59 45 2 12 12 1 11 1 1 12 19 18 16 2 2 2 15 15 16 16 16 1 15 15 1 1 15 16 5 5 5 5 5 1 1 21 1 1 23 23 23 23 5 1 1 22 20 21 20 2 4 1 3 3 8 8 1 7 16 16 8 4 4 3 1 4 24 23 5 23 23 23 21 2 16 16 16 16 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. * * The iopt_pages is the center of the storage and motion of PFNs. Each * iopt_pages represents a logical linear array of full PFNs. The array is 0 * based and has npages in it. Accessors use 'index' to refer to the entry in * this logical array, regardless of its storage location. * * PFNs are stored in a tiered scheme: * 1) iopt_pages::pinned_pfns xarray * 2) An iommu_domain * 3) The origin of the PFNs, i.e. the userspace pointer * * PFN have to be copied between all combinations of tiers, depending on the * configuration. * * When a PFN is taken out of the userspace pointer it is pinned exactly once. * The storage locations of the PFN's index are tracked in the two interval * trees. If no interval includes the index then it is not pinned. * * If access_itree includes the PFN's index then an in-kernel access has * requested the page. The PFN is stored in the xarray so other requestors can * continue to find it. * * If the domains_itree includes the PFN's index then an iommu_domain is storing * the PFN and it can be read back using iommu_iova_to_phys(). To avoid * duplicating storage the xarray is not used if only iommu_domains are using * the PFN's index. * * As a general principle this is designed so that destroy never fails. This * means removing an iommu_domain or releasing a in-kernel access will not fail * due to insufficient memory. In practice this means some cases have to hold * PFNs in the xarray even though they are also being stored in an iommu_domain. * * While the iopt_pages can use an iommu_domain as storage, it does not have an * IOVA itself. Instead the iopt_area represents a range of IOVA and uses the * iopt_pages as the PFN provider. Multiple iopt_areas can share the iopt_pages * and reference their own slice of the PFN array, with sub page granularity. * * In this file the term 'last' indicates an inclusive and closed interval, eg * [0,0] refers to a single PFN. 'end' means an open range, eg [0,0) refers to * no PFNs. * * Be cautious of overflow. An IOVA can go all the way up to U64_MAX, so * last_iova + 1 can overflow. An iopt_pages index will always be much less than * ULONG_MAX so last_index + 1 cannot overflow. */ #include <linux/file.h> #include <linux/highmem.h> #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/kthread.h> #include <linux/overflow.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include "double_span.h" #include "io_pagetable.h" #ifndef CONFIG_IOMMUFD_TEST #define TEMP_MEMORY_LIMIT 65536 #else #define TEMP_MEMORY_LIMIT iommufd_test_memory_limit #endif #define BATCH_BACKUP_SIZE 32 /* * More memory makes pin_user_pages() and the batching more efficient, but as * this is only a performance optimization don't try too hard to get it. A 64k * allocation can hold about 26M of 4k pages and 13G of 2M pages in an * pfn_batch. Various destroy paths cannot fail and provide a small amount of * stack memory as a backup contingency. If backup_len is given this cannot * fail. */ static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len) { void *res; if (WARN_ON(*size == 0)) return NULL; if (*size < backup_len) return backup; if (!backup && iommufd_should_fail()) return NULL; *size = min_t(size_t, *size, TEMP_MEMORY_LIMIT); res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (res) return res; *size = PAGE_SIZE; if (backup_len) { res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (res) return res; *size = backup_len; return backup; } return kmalloc(*size, GFP_KERNEL); } void interval_tree_double_span_iter_update( struct interval_tree_double_span_iter *iter) { unsigned long last_hole = ULONG_MAX; unsigned int i; for (i = 0; i != ARRAY_SIZE(iter->spans); i++) { if (interval_tree_span_iter_done(&iter->spans[i])) { iter->is_used = -1; return; } if (iter->spans[i].is_hole) { last_hole = min(last_hole, iter->spans[i].last_hole); continue; } iter->is_used = i + 1; iter->start_used = iter->spans[i].start_used; iter->last_used = min(iter->spans[i].last_used, last_hole); return; } iter->is_used = 0; iter->start_hole = iter->spans[0].start_hole; iter->last_hole = min(iter->spans[0].last_hole, iter->spans[1].last_hole); } void interval_tree_double_span_iter_first( struct interval_tree_double_span_iter *iter, struct rb_root_cached *itree1, struct rb_root_cached *itree2, unsigned long first_index, unsigned long last_index) { unsigned int i; iter->itrees[0] = itree1; iter->itrees[1] = itree2; for (i = 0; i != ARRAY_SIZE(iter->spans); i++) interval_tree_span_iter_first(&iter->spans[i], iter->itrees[i], first_index, last_index); interval_tree_double_span_iter_update(iter); } void interval_tree_double_span_iter_next( struct interval_tree_double_span_iter *iter) { unsigned int i; if (iter->is_used == -1 || iter->last_hole == iter->spans[0].last_index) { iter->is_used = -1; return; } for (i = 0; i != ARRAY_SIZE(iter->spans); i++) interval_tree_span_iter_advance( &iter->spans[i], iter->itrees[i], iter->last_hole + 1); interval_tree_double_span_iter_update(iter); } static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages) { int rc; rc = check_add_overflow(pages->npinned, npages, &pages->npinned); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(rc || pages->npinned > pages->npages); } static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages) { int rc; rc = check_sub_overflow(pages->npinned, npages, &pages->npinned); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(rc || pages->npinned > pages->npages); } static void iopt_pages_err_unpin(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **page_list) { unsigned long npages = last_index - start_index + 1; unpin_user_pages(page_list, npages); iopt_pages_sub_npinned(pages, npages); } /* * index is the number of PAGE_SIZE units from the start of the area's * iopt_pages. If the iova is sub page-size then the area has an iova that * covers a portion of the first and last pages in the range. */ static unsigned long iopt_area_index_to_iova(struct iopt_area *area, unsigned long index) { if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(index < iopt_area_index(area) || index > iopt_area_last_index(area)); index -= iopt_area_index(area); if (index == 0) return iopt_area_iova(area); return iopt_area_iova(area) - area->page_offset + index * PAGE_SIZE; } static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area, unsigned long index) { if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(index < iopt_area_index(area) || index > iopt_area_last_index(area)); if (index == iopt_area_last_index(area)) return iopt_area_last_iova(area); return iopt_area_iova(area) - area->page_offset + (index - iopt_area_index(area) + 1) * PAGE_SIZE - 1; } static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova, size_t size) { size_t ret; ret = iommu_unmap(domain, iova, size); /* * It is a logic error in this code or a driver bug if the IOMMU unmaps * something other than exactly as requested. This implies that the * iommu driver may not fail unmap for reasons beyond bad agruments. * Particularly, the iommu driver may not do a memory allocation on the * unmap path. */ WARN_ON(ret != size); } static void iopt_area_unmap_domain_range(struct iopt_area *area, struct iommu_domain *domain, unsigned long start_index, unsigned long last_index) { unsigned long start_iova = iopt_area_index_to_iova(area, start_index); iommu_unmap_nofail(domain, start_iova, iopt_area_index_to_iova_last(area, last_index) - start_iova + 1); } static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages, unsigned long index) { struct interval_tree_node *node; node = interval_tree_iter_first(&pages->domains_itree, index, index); if (!node) return NULL; return container_of(node, struct iopt_area, pages_node); } /* * A simple datastructure to hold a vector of PFNs, optimized for contiguous * PFNs. This is used as a temporary holding memory for shuttling pfns from one * place to another. Generally everything is made more efficient if operations * work on the largest possible grouping of pfns. eg fewer lock/unlock cycles, * better cache locality, etc */ struct pfn_batch { unsigned long *pfns; u32 *npfns; unsigned int array_size; unsigned int end; unsigned int total_pfns; }; static void batch_clear(struct pfn_batch *batch) { batch->total_pfns = 0; batch->end = 0; batch->pfns[0] = 0; batch->npfns[0] = 0; } /* * Carry means we carry a portion of the final hugepage over to the front of the * batch */ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) { if (!keep_pfns) return batch_clear(batch); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(!batch->end || batch->npfns[batch->end - 1] < keep_pfns); batch->total_pfns = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + (batch->npfns[batch->end - 1] - keep_pfns); batch->npfns[0] = keep_pfns; batch->end = 1; } static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns) { if (!batch->total_pfns) return; if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(batch->total_pfns != batch->npfns[0]); skip_pfns = min(batch->total_pfns, skip_pfns); batch->pfns[0] += skip_pfns; batch->npfns[0] -= skip_pfns; batch->total_pfns -= skip_pfns; } static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup, size_t backup_len) { const size_t elmsz = sizeof(*batch->pfns) + sizeof(*batch->npfns); size_t size = max_pages * elmsz; batch->pfns = temp_kmalloc(&size, backup, backup_len); if (!batch->pfns) return -ENOMEM; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(size < elmsz)) return -EINVAL; batch->array_size = size / elmsz; batch->npfns = (u32 *)(batch->pfns + batch->array_size); batch_clear(batch); return 0; } static int batch_init(struct pfn_batch *batch, size_t max_pages) { return __batch_init(batch, max_pages, NULL, 0); } static void batch_init_backup(struct pfn_batch *batch, size_t max_pages, void *backup, size_t backup_len) { __batch_init(batch, max_pages, backup, backup_len); } static void batch_destroy(struct pfn_batch *batch, void *backup) { if (batch->pfns != backup) kfree(batch->pfns); } static bool batch_add_pfn_num(struct pfn_batch *batch, unsigned long pfn, u32 nr) { const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); unsigned int end = batch->end; if (end && pfn == batch->pfns[end - 1] + batch->npfns[end - 1] && nr <= MAX_NPFNS - batch->npfns[end - 1]) { batch->npfns[end - 1] += nr; } else if (end < batch->array_size) { batch->pfns[end] = pfn; batch->npfns[end] = nr; batch->end++; } else { return false; } batch->total_pfns += nr; return true; } static void batch_remove_pfn_num(struct pfn_batch *batch, unsigned long nr) { batch->npfns[batch->end - 1] -= nr; if (batch->npfns[batch->end - 1] == 0) batch->end--; batch->total_pfns -= nr; } /* true if the pfn was added, false otherwise */ static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) { return batch_add_pfn_num(batch, pfn, 1); } /* * Fill the batch with pfns from the domain. When the batch is full, or it * reaches last_index, the function will return. The caller should use * batch->total_pfns to determine the starting point for the next iteration. */ static void batch_from_domain(struct pfn_batch *batch, struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index, unsigned long last_index) { unsigned int page_offset = 0; unsigned long iova; phys_addr_t phys; iova = iopt_area_index_to_iova(area, start_index); if (start_index == iopt_area_index(area)) page_offset = area->page_offset; while (start_index <= last_index) { /* * This is pretty slow, it would be nice to get the page size * back from the driver, or have the driver directly fill the * batch. */ phys = iommu_iova_to_phys(domain, iova) - page_offset; if (!batch_add_pfn(batch, PHYS_PFN(phys))) return; iova += PAGE_SIZE - page_offset; page_offset = 0; start_index++; } } static struct page **raw_pages_from_domain(struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index, unsigned long last_index, struct page **out_pages) { unsigned int page_offset = 0; unsigned long iova; phys_addr_t phys; iova = iopt_area_index_to_iova(area, start_index); if (start_index == iopt_area_index(area)) page_offset = area->page_offset; while (start_index <= last_index) { phys = iommu_iova_to_phys(domain, iova) - page_offset; *(out_pages++) = pfn_to_page(PHYS_PFN(phys)); iova += PAGE_SIZE - page_offset; page_offset = 0; start_index++; } return out_pages; } /* Continues reading a domain until we reach a discontinuity in the pfns. */ static void batch_from_domain_continue(struct pfn_batch *batch, struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index, unsigned long last_index) { unsigned int array_size = batch->array_size; batch->array_size = batch->end; batch_from_domain(batch, domain, area, start_index, last_index); batch->array_size = array_size; } /* * This is part of the VFIO compatibility support for VFIO_TYPE1_IOMMU. That * mode permits splitting a mapped area up, and then one of the splits is * unmapped. Doing this normally would cause us to violate our invariant of * pairing map/unmap. Thus, to support old VFIO compatibility disable support * for batching consecutive PFNs. All PFNs mapped into the iommu are done in * PAGE_SIZE units, not larger or smaller. */ static int batch_iommu_map_small(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { unsigned long start_iova = iova; int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(paddr % PAGE_SIZE || iova % PAGE_SIZE || size % PAGE_SIZE); while (size) { rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova += PAGE_SIZE; paddr += PAGE_SIZE; size -= PAGE_SIZE; } return 0; err_unmap: if (start_iova != iova) iommu_unmap_nofail(domain, start_iova, iova - start_iova); return rc; } static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, struct iopt_area *area, unsigned long start_index) { bool disable_large_pages = area->iopt->disable_large_pages; unsigned long last_iova = iopt_area_last_iova(area); unsigned int page_offset = 0; unsigned long start_iova; unsigned long next_iova; unsigned int cur = 0; unsigned long iova; int rc; /* The first index might be a partial page */ if (start_index == iopt_area_index(area)) page_offset = area->page_offset; next_iova = iova = start_iova = iopt_area_index_to_iova(area, start_index); while (cur < batch->end) { next_iova = min(last_iova + 1, next_iova + batch->npfns[cur] * PAGE_SIZE - page_offset); if (disable_large_pages) rc = batch_iommu_map_small( domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, next_iova - iova, area->iommu_prot); else rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, next_iova - iova, area->iommu_prot, GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova = next_iova; page_offset = 0; cur++; } return 0; err_unmap: if (start_iova != iova) iommu_unmap_nofail(domain, start_iova, iova - start_iova); return rc; } static void batch_from_xarray(struct pfn_batch *batch, struct xarray *xa, unsigned long start_index, unsigned long last_index) { XA_STATE(xas, xa, start_index); void *entry; rcu_read_lock(); while (true) { entry = xas_next(&xas); if (xas_retry(&xas, entry)) continue; WARN_ON(!xa_is_value(entry)); if (!batch_add_pfn(batch, xa_to_value(entry)) || start_index == last_index) break; start_index++; } rcu_read_unlock(); } static void batch_from_xarray_clear(struct pfn_batch *batch, struct xarray *xa, unsigned long start_index, unsigned long last_index) { XA_STATE(xas, xa, start_index); void *entry; xas_lock(&xas); while (true) { entry = xas_next(&xas); if (xas_retry(&xas, entry)) continue; WARN_ON(!xa_is_value(entry)); if (!batch_add_pfn(batch, xa_to_value(entry))) break; xas_store(&xas, NULL); if (start_index == last_index) break; start_index++; } xas_unlock(&xas); } static void clear_xarray(struct xarray *xa, unsigned long start_index, unsigned long last_index) { XA_STATE(xas, xa, start_index); void *entry; xas_lock(&xas); xas_for_each(&xas, entry, last_index) xas_store(&xas, NULL); xas_unlock(&xas); } static int pages_to_xarray(struct xarray *xa, unsigned long start_index, unsigned long last_index, struct page **pages) { struct page **end_pages = pages + (last_index - start_index) + 1; struct page **half_pages = pages + (end_pages - pages) / 2; XA_STATE(xas, xa, start_index); do { void *old; xas_lock(&xas); while (pages != end_pages) { /* xarray does not participate in fault injection */ if (pages == half_pages && iommufd_should_fail()) { xas_set_err(&xas, -EINVAL); xas_unlock(&xas); /* aka xas_destroy() */ xas_nomem(&xas, GFP_KERNEL); goto err_clear; } old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages))); if (xas_error(&xas)) break; WARN_ON(old); pages++; xas_next(&xas); } xas_unlock(&xas); } while (xas_nomem(&xas, GFP_KERNEL)); err_clear: if (xas_error(&xas)) { if (xas.xa_index != start_index) clear_xarray(xa, start_index, xas.xa_index - 1); return xas_error(&xas); } return 0; } static void batch_from_pages(struct pfn_batch *batch, struct page **pages, size_t npages) { struct page **end = pages + npages; for (; pages != end; pages++) if (!batch_add_pfn(batch, page_to_pfn(*pages))) break; } static int batch_from_folios(struct pfn_batch *batch, struct folio ***folios_p, unsigned long *offset_p, unsigned long npages) { int rc = 0; struct folio **folios = *folios_p; unsigned long offset = *offset_p; while (npages) { struct folio *folio = *folios; unsigned long nr = folio_nr_pages(folio) - offset; unsigned long pfn = page_to_pfn(folio_page(folio, offset)); nr = min(nr, npages); npages -= nr; if (!batch_add_pfn_num(batch, pfn, nr)) break; if (nr > 1) { rc = folio_add_pins(folio, nr - 1); if (rc) { batch_remove_pfn_num(batch, nr); goto out; } } folios++; offset = 0; } out: *folios_p = folios; *offset_p = offset; return rc; } static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages, unsigned int first_page_off, size_t npages) { unsigned int cur = 0; while (first_page_off) { if (batch->npfns[cur] > first_page_off) break; first_page_off -= batch->npfns[cur]; cur++; } while (npages) { size_t to_unpin = min_t(size_t, npages, batch->npfns[cur] - first_page_off); unpin_user_page_range_dirty_lock( pfn_to_page(batch->pfns[cur] + first_page_off), to_unpin, pages->writable); iopt_pages_sub_npinned(pages, to_unpin); cur++; first_page_off = 0; npages -= to_unpin; } } static void copy_data_page(struct page *page, void *data, unsigned long offset, size_t length, unsigned int flags) { void *mem; mem = kmap_local_page(page); if (flags & IOMMUFD_ACCESS_RW_WRITE) { memcpy(mem + offset, data, length); set_page_dirty_lock(page); } else { memcpy(data, mem + offset, length); } kunmap_local(mem); } static unsigned long batch_rw(struct pfn_batch *batch, void *data, unsigned long offset, unsigned long length, unsigned int flags) { unsigned long copied = 0; unsigned int npage = 0; unsigned int cur = 0; while (cur < batch->end) { unsigned long bytes = min(length, PAGE_SIZE - offset); copy_data_page(pfn_to_page(batch->pfns[cur] + npage), data, offset, bytes, flags); offset = 0; length -= bytes; data += bytes; copied += bytes; npage++; if (npage == batch->npfns[cur]) { npage = 0; cur++; } if (!length) break; } return copied; } /* pfn_reader_user is just the pin_user_pages() path */ struct pfn_reader_user { struct page **upages; size_t upages_len; unsigned long upages_start; unsigned long upages_end; unsigned int gup_flags; /* * 1 means mmget() and mmap_read_lock(), 0 means only mmget(), -1 is * neither */ int locked; /* The following are only valid if file != NULL. */ struct file *file; struct folio **ufolios; size_t ufolios_len; unsigned long ufolios_offset; struct folio **ufolios_next; }; static void pfn_reader_user_init(struct pfn_reader_user *user, struct iopt_pages *pages) { user->upages = NULL; user->upages_len = 0; user->upages_start = 0; user->upages_end = 0; user->locked = -1; user->gup_flags = FOLL_LONGTERM; if (pages->writable) user->gup_flags |= FOLL_WRITE; user->file = (pages->type == IOPT_ADDRESS_FILE) ? pages->file : NULL; user->ufolios = NULL; user->ufolios_len = 0; user->ufolios_next = NULL; user->ufolios_offset = 0; } static void pfn_reader_user_destroy(struct pfn_reader_user *user, struct iopt_pages *pages) { if (user->locked != -1) { if (user->locked) mmap_read_unlock(pages->source_mm); if (!user->file && pages->source_mm != current->mm) mmput(pages->source_mm); user->locked = -1; } kfree(user->upages); user->upages = NULL; kfree(user->ufolios); user->ufolios = NULL; } static long pin_memfd_pages(struct pfn_reader_user *user, unsigned long start, unsigned long npages) { unsigned long i; unsigned long offset; unsigned long npages_out = 0; struct page **upages = user->upages; unsigned long end = start + (npages << PAGE_SHIFT) - 1; long nfolios = user->ufolios_len / sizeof(*user->ufolios); /* * todo: memfd_pin_folios should return the last pinned offset so * we can compute npages pinned, and avoid looping over folios here * if upages == NULL. */ nfolios = memfd_pin_folios(user->file, start, end, user->ufolios, nfolios, &offset); if (nfolios <= 0) return nfolios; offset >>= PAGE_SHIFT; user->ufolios_next = user->ufolios; user->ufolios_offset = offset; for (i = 0; i < nfolios; i++) { struct folio *folio = user->ufolios[i]; unsigned long nr = folio_nr_pages(folio); unsigned long npin = min(nr - offset, npages); npages -= npin; npages_out += npin; if (upages) { if (npin == 1) { *upages++ = folio_page(folio, offset); } else { int rc = folio_add_pins(folio, npin - 1); if (rc) return rc; while (npin--) *upages++ = folio_page(folio, offset++); } } offset = 0; } return npages_out; } static int pfn_reader_user_pin(struct pfn_reader_user *user, struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { bool remote_mm = pages->source_mm != current->mm; unsigned long npages = last_index - start_index + 1; unsigned long start; unsigned long unum; uintptr_t uptr; long rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(last_index < start_index)) return -EINVAL; if (!user->file && !user->upages) { /* All undone in pfn_reader_destroy() */ user->upages_len = npages * sizeof(*user->upages); user->upages = temp_kmalloc(&user->upages_len, NULL, 0); if (!user->upages) return -ENOMEM; } if (user->file && !user->ufolios) { user->ufolios_len = npages * sizeof(*user->ufolios); user->ufolios = temp_kmalloc(&user->ufolios_len, NULL, 0); if (!user->ufolios) return -ENOMEM; } if (user->locked == -1) { /* * The majority of usages will run the map task within the mm * providing the pages, so we can optimize into * get_user_pages_fast() */ if (!user->file && remote_mm) { if (!mmget_not_zero(pages->source_mm)) return -EFAULT; } user->locked = 0; } unum = user->file ? user->ufolios_len / sizeof(*user->ufolios) : user->upages_len / sizeof(*user->upages); npages = min_t(unsigned long, npages, unum); if (iommufd_should_fail()) return -EFAULT; if (user->file) { start = pages->start + (start_index * PAGE_SIZE); rc = pin_memfd_pages(user, start, npages); } else if (!remote_mm) { uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); rc = pin_user_pages_fast(uptr, npages, user->gup_flags, user->upages); } else { uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); if (!user->locked) { mmap_read_lock(pages->source_mm); user->locked = 1; } rc = pin_user_pages_remote(pages->source_mm, uptr, npages, user->gup_flags, user->upages, &user->locked); } if (rc <= 0) { if (WARN_ON(!rc)) return -EFAULT; return rc; } iopt_pages_add_npinned(pages, rc); user->upages_start = start_index; user->upages_end = start_index + rc; return 0; } /* This is the "modern" and faster accounting method used by io_uring */ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) { unsigned long lock_limit; unsigned long cur_pages; unsigned long new_pages; lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; cur_pages = atomic_long_read(&pages->source_user->locked_vm); do { new_pages = cur_pages + npages; if (new_pages > lock_limit) return -ENOMEM; } while (!atomic_long_try_cmpxchg(&pages->source_user->locked_vm, &cur_pages, new_pages)); return 0; } static void decr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) { if (WARN_ON(atomic_long_read(&pages->source_user->locked_vm) < npages)) return; atomic_long_sub(npages, &pages->source_user->locked_vm); } /* This is the accounting method used for compatibility with VFIO */ static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages, bool inc, struct pfn_reader_user *user) { bool do_put = false; int rc; if (user && user->locked) { mmap_read_unlock(pages->source_mm); user->locked = 0; /* If we had the lock then we also have a get */ } else if ((!user || (!user->upages && !user->ufolios)) && pages->source_mm != current->mm) { if (!mmget_not_zero(pages->source_mm)) return -EINVAL; do_put = true; } mmap_write_lock(pages->source_mm); rc = __account_locked_vm(pages->source_mm, npages, inc, pages->source_task, false); mmap_write_unlock(pages->source_mm); if (do_put) mmput(pages->source_mm); return rc; } int iopt_pages_update_pinned(struct iopt_pages *pages, unsigned long npages, bool inc, struct pfn_reader_user *user) { int rc = 0; switch (pages->account_mode) { case IOPT_PAGES_ACCOUNT_NONE: break; case IOPT_PAGES_ACCOUNT_USER: if (inc) rc = incr_user_locked_vm(pages, npages); else decr_user_locked_vm(pages, npages); break; case IOPT_PAGES_ACCOUNT_MM: rc = update_mm_locked_vm(pages, npages, inc, user); break; } if (rc) return rc; pages->last_npinned = pages->npinned; if (inc) atomic64_add(npages, &pages->source_mm->pinned_vm); else atomic64_sub(npages, &pages->source_mm->pinned_vm); return 0; } static void update_unpinned(struct iopt_pages *pages) { if (WARN_ON(pages->npinned > pages->last_npinned)) return; if (pages->npinned == pages->last_npinned) return; iopt_pages_update_pinned(pages, pages->last_npinned - pages->npinned, false, NULL); } /* * Changes in the number of pages pinned is done after the pages have been read * and processed. If the user lacked the limit then the error unwind will unpin * everything that was just pinned. This is because it is expensive to calculate * how many pages we have already pinned within a range to generate an accurate * prediction in advance of doing the work to actually pin them. */ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, struct iopt_pages *pages) { unsigned long npages; bool inc; lockdep_assert_held(&pages->mutex); if (pages->npinned == pages->last_npinned) return 0; if (pages->npinned < pages->last_npinned) { npages = pages->last_npinned - pages->npinned; inc = false; } else { if (iommufd_should_fail()) return -ENOMEM; npages = pages->npinned - pages->last_npinned; inc = true; } return iopt_pages_update_pinned(pages, npages, inc, user); } /* * PFNs are stored in three places, in order of preference: * - The iopt_pages xarray. This is only populated if there is a * iopt_pages_access * - The iommu_domain under an area * - The original PFN source, ie pages->source_mm * * This iterator reads the pfns optimizing to load according to the * above order. */ struct pfn_reader { struct iopt_pages *pages; struct interval_tree_double_span_iter span; struct pfn_batch batch; unsigned long batch_start_index; unsigned long batch_end_index; unsigned long last_index; struct pfn_reader_user user; }; static int pfn_reader_update_pinned(struct pfn_reader *pfns) { return pfn_reader_user_update_pinned(&pfns->user, pfns->pages); } /* * The batch can contain a mixture of pages that are still in use and pages that * need to be unpinned. Unpin only pages that are not held anywhere else. */ static void pfn_reader_unpin(struct pfn_reader *pfns) { unsigned long last = pfns->batch_end_index - 1; unsigned long start = pfns->batch_start_index; struct interval_tree_double_span_iter span; struct iopt_pages *pages = pfns->pages; lockdep_assert_held(&pages->mutex); interval_tree_for_each_double_span(&span, &pages->access_itree, &pages->domains_itree, start, last) { if (span.is_used) continue; batch_unpin(&pfns->batch, pages, span.start_hole - start, span.last_hole - span.start_hole + 1); } } /* Process a single span to load it from the proper storage */ static int pfn_reader_fill_span(struct pfn_reader *pfns) { struct interval_tree_double_span_iter *span = &pfns->span; unsigned long start_index = pfns->batch_end_index; struct pfn_reader_user *user = &pfns->user; unsigned long npages; struct iopt_area *area; int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(span->last_used < start_index)) return -EINVAL; if (span->is_used == 1) { batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns, start_index, span->last_used); return 0; } if (span->is_used == 2) { /* * Pull as many pages from the first domain we find in the * target span. If it is too small then we will be called again * and we'll find another area. */ area = iopt_pages_find_domain_area(pfns->pages, start_index); if (WARN_ON(!area)) return -EINVAL; /* The storage_domain cannot change without the pages mutex */ batch_from_domain( &pfns->batch, area->storage_domain, area, start_index, min(iopt_area_last_index(area), span->last_used)); return 0; } if (start_index >= pfns->user.upages_end) { rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index, span->last_hole); if (rc) return rc; } npages = user->upages_end - start_index; start_index -= user->upages_start; rc = 0; if (!user->file) batch_from_pages(&pfns->batch, user->upages + start_index, npages); else rc = batch_from_folios(&pfns->batch, &user->ufolios_next, &user->ufolios_offset, npages); return rc; } static bool pfn_reader_done(struct pfn_reader *pfns) { return pfns->batch_start_index == pfns->last_index + 1; } static int pfn_reader_next(struct pfn_reader *pfns) { int rc; batch_clear(&pfns->batch); pfns->batch_start_index = pfns->batch_end_index; while (pfns->batch_end_index != pfns->last_index + 1) { unsigned int npfns = pfns->batch.total_pfns; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(interval_tree_double_span_iter_done(&pfns->span))) return -EINVAL; rc = pfn_reader_fill_span(pfns); if (rc) return rc; if (WARN_ON(!pfns->batch.total_pfns)) return -EINVAL; pfns->batch_end_index = pfns->batch_start_index + pfns->batch.total_pfns; if (pfns->batch_end_index == pfns->span.last_used + 1) interval_tree_double_span_iter_next(&pfns->span); /* Batch is full */ if (npfns == pfns->batch.total_pfns) return 0; } return 0; } static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { int rc; lockdep_assert_held(&pages->mutex); pfns->pages = pages; pfns->batch_start_index = start_index; pfns->batch_end_index = start_index; pfns->last_index = last_index; pfn_reader_user_init(&pfns->user, pages); rc = batch_init(&pfns->batch, last_index - start_index + 1); if (rc) return rc; interval_tree_double_span_iter_first(&pfns->span, &pages->access_itree, &pages->domains_itree, start_index, last_index); return 0; } /* * There are many assertions regarding the state of pages->npinned vs * pages->last_pinned, for instance something like unmapping a domain must only * decrement the npinned, and pfn_reader_destroy() must be called only after all * the pins are updated. This is fine for success flows, but error flows * sometimes need to release the pins held inside the pfn_reader before going on * to complete unmapping and releasing pins held in domains. */ static void pfn_reader_release_pins(struct pfn_reader *pfns) { struct iopt_pages *pages = pfns->pages; struct pfn_reader_user *user = &pfns->user; if (user->upages_end > pfns->batch_end_index) { /* Any pages not transferred to the batch are just unpinned */ unsigned long npages = user->upages_end - pfns->batch_end_index; unsigned long start_index = pfns->batch_end_index - user->upages_start; if (!user->file) { unpin_user_pages(user->upages + start_index, npages); } else { long n = user->ufolios_len / sizeof(*user->ufolios); unpin_folios(user->ufolios_next, user->ufolios + n - user->ufolios_next); } iopt_pages_sub_npinned(pages, npages); user->upages_end = pfns->batch_end_index; } if (pfns->batch_start_index != pfns->batch_end_index) { pfn_reader_unpin(pfns); pfns->batch_start_index = pfns->batch_end_index; } } static void pfn_reader_destroy(struct pfn_reader *pfns) { struct iopt_pages *pages = pfns->pages; pfn_reader_release_pins(pfns); pfn_reader_user_destroy(&pfns->user, pfns->pages); batch_destroy(&pfns->batch, NULL); WARN_ON(pages->last_npinned != pages->npinned); } static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(last_index < start_index)) return -EINVAL; rc = pfn_reader_init(pfns, pages, start_index, last_index); if (rc) return rc; rc = pfn_reader_next(pfns); if (rc) { pfn_reader_destroy(pfns); return rc; } return 0; } static struct iopt_pages *iopt_alloc_pages(unsigned long start_byte, unsigned long length, bool writable) { struct iopt_pages *pages; /* * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP * below from overflow */ if (length > SIZE_MAX - PAGE_SIZE || length == 0) return ERR_PTR(-EINVAL); pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); if (!pages) return ERR_PTR(-ENOMEM); kref_init(&pages->kref); xa_init_flags(&pages->pinned_pfns, XA_FLAGS_ACCOUNT); mutex_init(&pages->mutex); pages->source_mm = current->mm; mmgrab(pages->source_mm); pages->npages = DIV_ROUND_UP(length + start_byte, PAGE_SIZE); pages->access_itree = RB_ROOT_CACHED; pages->domains_itree = RB_ROOT_CACHED; pages->writable = writable; if (capable(CAP_IPC_LOCK)) pages->account_mode = IOPT_PAGES_ACCOUNT_NONE; else pages->account_mode = IOPT_PAGES_ACCOUNT_USER; pages->source_task = current->group_leader; get_task_struct(current->group_leader); pages->source_user = get_uid(current_user()); return pages; } struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, unsigned long length, bool writable) { struct iopt_pages *pages; unsigned long end; void __user *uptr_down = (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); if (check_add_overflow((unsigned long)uptr, length, &end)) return ERR_PTR(-EOVERFLOW); pages = iopt_alloc_pages(uptr - uptr_down, length, writable); if (IS_ERR(pages)) return pages; pages->uptr = uptr_down; pages->type = IOPT_ADDRESS_USER; return pages; } struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, unsigned long length, bool writable) { struct iopt_pages *pages; unsigned long start_down = ALIGN_DOWN(start, PAGE_SIZE); unsigned long end; if (length && check_add_overflow(start, length - 1, &end)) return ERR_PTR(-EOVERFLOW); pages = iopt_alloc_pages(start - start_down, length, writable); if (IS_ERR(pages)) return pages; pages->file = get_file(file); pages->start = start_down; pages->type = IOPT_ADDRESS_FILE; return pages; } void iopt_release_pages(struct kref *kref) { struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref); WARN_ON(!RB_EMPTY_ROOT(&pages->access_itree.rb_root)); WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root)); WARN_ON(pages->npinned); WARN_ON(!xa_empty(&pages->pinned_pfns)); mmdrop(pages->source_mm); mutex_destroy(&pages->mutex); put_task_struct(pages->source_task); free_uid(pages->source_user); if (pages->type == IOPT_ADDRESS_FILE) fput(pages->file); kfree(pages); } static void iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain, unsigned long start_index, unsigned long last_index, unsigned long *unmapped_end_index, unsigned long real_last_index) { while (start_index <= last_index) { unsigned long batch_last_index; if (*unmapped_end_index <= last_index) { unsigned long start = max(start_index, *unmapped_end_index); if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && batch->total_pfns) WARN_ON(*unmapped_end_index - batch->total_pfns != start_index); batch_from_domain(batch, domain, area, start, last_index); batch_last_index = start_index + batch->total_pfns - 1; } else { batch_last_index = last_index; } if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(batch_last_index > real_last_index); /* * unmaps must always 'cut' at a place where the pfns are not * contiguous to pair with the maps that always install * contiguous pages. Thus, if we have to stop unpinning in the * middle of the domains we need to keep reading pfns until we * find a cut point to do the unmap. The pfns we read are * carried over and either skipped or integrated into the next * batch. */ if (batch_last_index == last_index && last_index != real_last_index) batch_from_domain_continue(batch, domain, area, last_index + 1, real_last_index); if (*unmapped_end_index <= batch_last_index) { iopt_area_unmap_domain_range( area, domain, *unmapped_end_index, start_index + batch->total_pfns - 1); *unmapped_end_index = start_index + batch->total_pfns; } /* unpin must follow unmap */ batch_unpin(batch, pages, 0, batch_last_index - start_index + 1); start_index = batch_last_index + 1; batch_clear_carry(batch, *unmapped_end_index - batch_last_index - 1); } } static void __iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain, unsigned long last_index) { struct interval_tree_double_span_iter span; unsigned long start_index = iopt_area_index(area); unsigned long unmapped_end_index = start_index; u64 backup[BATCH_BACKUP_SIZE]; struct pfn_batch batch; lockdep_assert_held(&pages->mutex); /* * For security we must not unpin something that is still DMA mapped, * so this must unmap any IOVA before we go ahead and unpin the pages. * This creates a complexity where we need to skip over unpinning pages * held in the xarray, but continue to unmap from the domain. * * The domain unmap cannot stop in the middle of a contiguous range of * PFNs. To solve this problem the unpinning step will read ahead to the * end of any contiguous span, unmap that whole span, and then only * unpin the leading part that does not have any accesses. The residual * PFNs that were unmapped but not unpinned are called a "carry" in the * batch as they are moved to the front of the PFN list and continue on * to the next iteration(s). */ batch_init_backup(&batch, last_index + 1, backup, sizeof(backup)); interval_tree_for_each_double_span(&span, &pages->domains_itree, &pages->access_itree, start_index, last_index) { if (span.is_used) { batch_skip_carry(&batch, span.last_used - span.start_used + 1); continue; } iopt_area_unpin_domain(&batch, area, pages, domain, span.start_hole, span.last_hole, &unmapped_end_index, last_index); } /* * If the range ends in a access then we do the residual unmap without * any unpins. */ if (unmapped_end_index != last_index + 1) iopt_area_unmap_domain_range(area, domain, unmapped_end_index, last_index); WARN_ON(batch.total_pfns); batch_destroy(&batch, backup); update_unpinned(pages); } static void iopt_area_unfill_partial_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain, unsigned long end_index) { if (end_index != iopt_area_index(area)) __iopt_area_unfill_domain(area, pages, domain, end_index - 1); } /** * iopt_area_unmap_domain() - Unmap without unpinning PFNs in a domain * @area: The IOVA range to unmap * @domain: The domain to unmap * * The caller must know that unpinning is not required, usually because there * are other domains in the iopt. */ void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain) { iommu_unmap_nofail(domain, iopt_area_iova(area), iopt_area_length(area)); } /** * iopt_area_unfill_domain() - Unmap and unpin PFNs in a domain * @area: IOVA area to use * @pages: page supplier for the area (area->pages is NULL) * @domain: Domain to unmap from * * The domain should be removed from the domains_itree before calling. The * domain will always be unmapped, but the PFNs may not be unpinned if there are * still accesses. */ void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain) { __iopt_area_unfill_domain(area, pages, domain, iopt_area_last_index(area)); } /** * iopt_area_fill_domain() - Map PFNs from the area into a domain * @area: IOVA area to use * @domain: Domain to load PFNs into * * Read the pfns from the area's underlying iopt_pages and map them into the * given domain. Called when attaching a new domain to an io_pagetable. */ int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain) { unsigned long done_end_index; struct pfn_reader pfns; int rc; lockdep_assert_held(&area->pages->mutex); rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area), iopt_area_last_index(area)); if (rc) return rc; while (!pfn_reader_done(&pfns)) { done_end_index = pfns.batch_start_index; rc = batch_to_domain(&pfns.batch, domain, area, pfns.batch_start_index); if (rc) goto out_unmap; done_end_index = pfns.batch_end_index; rc = pfn_reader_next(&pfns); if (rc) goto out_unmap; } rc = pfn_reader_update_pinned(&pfns); if (rc) goto out_unmap; goto out_destroy; out_unmap: pfn_reader_release_pins(&pfns); iopt_area_unfill_partial_domain(area, area->pages, domain, done_end_index); out_destroy: pfn_reader_destroy(&pfns); return rc; } /** * iopt_area_fill_domains() - Install PFNs into the area's domains * @area: The area to act on * @pages: The pages associated with the area (area->pages is NULL) * * Called during area creation. The area is freshly created and not inserted in * the domains_itree yet. PFNs are read and loaded into every domain held in the * area's io_pagetable and the area is installed in the domains_itree. * * On failure all domains are left unchanged. */ int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages) { unsigned long done_first_end_index; unsigned long done_all_end_index; struct iommu_domain *domain; unsigned long unmap_index; struct pfn_reader pfns; unsigned long index; int rc; lockdep_assert_held(&area->iopt->domains_rwsem); if (xa_empty(&area->iopt->domains)) return 0; mutex_lock(&pages->mutex); rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), iopt_area_last_index(area)); if (rc) goto out_unlock; while (!pfn_reader_done(&pfns)) { done_first_end_index = pfns.batch_end_index; done_all_end_index = pfns.batch_start_index; xa_for_each(&area->iopt->domains, index, domain) { rc = batch_to_domain(&pfns.batch, domain, area, pfns.batch_start_index); if (rc) goto out_unmap; } done_all_end_index = done_first_end_index; rc = pfn_reader_next(&pfns); if (rc) goto out_unmap; } rc = pfn_reader_update_pinned(&pfns); if (rc) goto out_unmap; area->storage_domain = xa_load(&area->iopt->domains, 0); interval_tree_insert(&area->pages_node, &pages->domains_itree); goto out_destroy; out_unmap: pfn_reader_release_pins(&pfns); xa_for_each(&area->iopt->domains, unmap_index, domain) { unsigned long end_index; if (unmap_index < index) end_index = done_first_end_index; else end_index = done_all_end_index; /* * The area is not yet part of the domains_itree so we have to * manage the unpinning specially. The last domain does the * unpin, every other domain is just unmapped. */ if (unmap_index != area->iopt->next_domain_id - 1) { if (end_index != iopt_area_index(area)) iopt_area_unmap_domain_range( area, domain, iopt_area_index(area), end_index - 1); } else { iopt_area_unfill_partial_domain(area, pages, domain, end_index); } } out_destroy: pfn_reader_destroy(&pfns); out_unlock: mutex_unlock(&pages->mutex); return rc; } /** * iopt_area_unfill_domains() - unmap PFNs from the area's domains * @area: The area to act on * @pages: The pages associated with the area (area->pages is NULL) * * Called during area destruction. This unmaps the iova's covered by all the * area's domains and releases the PFNs. */ void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages) { struct io_pagetable *iopt = area->iopt; struct iommu_domain *domain; unsigned long index; lockdep_assert_held(&iopt->domains_rwsem); mutex_lock(&pages->mutex); if (!area->storage_domain) goto out_unlock; xa_for_each(&iopt->domains, index, domain) if (domain != area->storage_domain) iopt_area_unmap_domain_range( area, domain, iopt_area_index(area), iopt_area_last_index(area)); if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(RB_EMPTY_NODE(&area->pages_node.rb)); interval_tree_remove(&area->pages_node, &pages->domains_itree); iopt_area_unfill_domain(area, pages, area->storage_domain); area->storage_domain = NULL; out_unlock: mutex_unlock(&pages->mutex); } static void iopt_pages_unpin_xarray(struct pfn_batch *batch, struct iopt_pages *pages, unsigned long start_index, unsigned long end_index) { while (start_index <= end_index) { batch_from_xarray_clear(batch, &pages->pinned_pfns, start_index, end_index); batch_unpin(batch, pages, 0, batch->total_pfns); start_index += batch->total_pfns; batch_clear(batch); } } /** * iopt_pages_unfill_xarray() - Update the xarry after removing an access * @pages: The pages to act on * @start_index: Starting PFN index * @last_index: Last PFN index * * Called when an iopt_pages_access is removed, removes pages from the itree. * The access should already be removed from the access_itree. */ void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index) { struct interval_tree_double_span_iter span; u64 backup[BATCH_BACKUP_SIZE]; struct pfn_batch batch; bool batch_inited = false; lockdep_assert_held(&pages->mutex); interval_tree_for_each_double_span(&span, &pages->access_itree, &pages->domains_itree, start_index, last_index) { if (!span.is_used) { if (!batch_inited) { batch_init_backup(&batch, last_index - start_index + 1, backup, sizeof(backup)); batch_inited = true; } iopt_pages_unpin_xarray(&batch, pages, span.start_hole, span.last_hole); } else if (span.is_used == 2) { /* Covered by a domain */ clear_xarray(&pages->pinned_pfns, span.start_used, span.last_used); } /* Otherwise covered by an existing access */ } if (batch_inited) batch_destroy(&batch, backup); update_unpinned(pages); } /** * iopt_pages_fill_from_xarray() - Fast path for reading PFNs * @pages: The pages to act on * @start_index: The first page index in the range * @last_index: The last page index in the range * @out_pages: The output array to return the pages * * This can be called if the caller is holding a refcount on an * iopt_pages_access that is known to have already been filled. It quickly reads * the pages directly from the xarray. * * This is part of the SW iommu interface to read pages for in-kernel use. */ void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **out_pages) { XA_STATE(xas, &pages->pinned_pfns, start_index); void *entry; rcu_read_lock(); while (start_index <= last_index) { entry = xas_next(&xas); if (xas_retry(&xas, entry)) continue; WARN_ON(!xa_is_value(entry)); *(out_pages++) = pfn_to_page(xa_to_value(entry)); start_index++; } rcu_read_unlock(); } static int iopt_pages_fill_from_domain(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **out_pages) { while (start_index != last_index + 1) { unsigned long domain_last; struct iopt_area *area; area = iopt_pages_find_domain_area(pages, start_index); if (WARN_ON(!area)) return -EINVAL; domain_last = min(iopt_area_last_index(area), last_index); out_pages = raw_pages_from_domain(area->storage_domain, area, start_index, domain_last, out_pages); start_index = domain_last + 1; } return 0; } static int iopt_pages_fill(struct iopt_pages *pages, struct pfn_reader_user *user, unsigned long start_index, unsigned long last_index, struct page **out_pages) { unsigned long cur_index = start_index; int rc; while (cur_index != last_index + 1) { user->upages = out_pages + (cur_index - start_index); rc = pfn_reader_user_pin(user, pages, cur_index, last_index); if (rc) goto out_unpin; cur_index = user->upages_end; } return 0; out_unpin: if (start_index != cur_index) iopt_pages_err_unpin(pages, start_index, cur_index - 1, out_pages); return rc; } /** * iopt_pages_fill_xarray() - Read PFNs * @pages: The pages to act on * @start_index: The first page index in the range * @last_index: The last page index in the range * @out_pages: The output array to return the pages, may be NULL * * This populates the xarray and returns the pages in out_pages. As the slow * path this is able to copy pages from other storage tiers into the xarray. * * On failure the xarray is left unchanged. * * This is part of the SW iommu interface to read pages for in-kernel use. */ int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, struct page **out_pages) { struct interval_tree_double_span_iter span; unsigned long xa_end = start_index; struct pfn_reader_user user; int rc; lockdep_assert_held(&pages->mutex); pfn_reader_user_init(&user, pages); user.upages_len = (last_index - start_index + 1) * sizeof(*out_pages); interval_tree_for_each_double_span(&span, &pages->access_itree, &pages->domains_itree, start_index, last_index) { struct page **cur_pages; if (span.is_used == 1) { cur_pages = out_pages + (span.start_used - start_index); iopt_pages_fill_from_xarray(pages, span.start_used, span.last_used, cur_pages); continue; } if (span.is_used == 2) { cur_pages = out_pages + (span.start_used - start_index); iopt_pages_fill_from_domain(pages, span.start_used, span.last_used, cur_pages); rc = pages_to_xarray(&pages->pinned_pfns, span.start_used, span.last_used, cur_pages); if (rc) goto out_clean_xa; xa_end = span.last_used + 1; continue; } /* hole */ cur_pages = out_pages + (span.start_hole - start_index); rc = iopt_pages_fill(pages, &user, span.start_hole, span.last_hole, cur_pages); if (rc) goto out_clean_xa; rc = pages_to_xarray(&pages->pinned_pfns, span.start_hole, span.last_hole, cur_pages); if (rc) { iopt_pages_err_unpin(pages, span.start_hole, span.last_hole, cur_pages); goto out_clean_xa; } xa_end = span.last_hole + 1; } rc = pfn_reader_user_update_pinned(&user, pages); if (rc) goto out_clean_xa; user.upages = NULL; pfn_reader_user_destroy(&user, pages); return 0; out_clean_xa: if (start_index != xa_end) iopt_pages_unfill_xarray(pages, start_index, xa_end - 1); user.upages = NULL; pfn_reader_user_destroy(&user, pages); return rc; } /* * This uses the pfn_reader instead of taking a shortcut by using the mm. It can * do every scenario and is fully consistent with what an iommu_domain would * see. */ static int iopt_pages_rw_slow(struct iopt_pages *pages, unsigned long start_index, unsigned long last_index, unsigned long offset, void *data, unsigned long length, unsigned int flags) { struct pfn_reader pfns; int rc; mutex_lock(&pages->mutex); rc = pfn_reader_first(&pfns, pages, start_index, last_index); if (rc) goto out_unlock; while (!pfn_reader_done(&pfns)) { unsigned long done; done = batch_rw(&pfns.batch, data, offset, length, flags); data += done; length -= done; offset = 0; pfn_reader_unpin(&pfns); rc = pfn_reader_next(&pfns); if (rc) goto out_destroy; } if (WARN_ON(length != 0)) rc = -EINVAL; out_destroy: pfn_reader_destroy(&pfns); out_unlock: mutex_unlock(&pages->mutex); return rc; } /* * A medium speed path that still allows DMA inconsistencies, but doesn't do any * memory allocations or interval tree searches. */ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index, unsigned long offset, void *data, unsigned long length, unsigned int flags) { struct page *page = NULL; int rc; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(pages->type != IOPT_ADDRESS_USER)) return -EINVAL; if (!mmget_not_zero(pages->source_mm)) return iopt_pages_rw_slow(pages, index, index, offset, data, length, flags); if (iommufd_should_fail()) { rc = -EINVAL; goto out_mmput; } mmap_read_lock(pages->source_mm); rc = pin_user_pages_remote( pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE), 1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page, NULL); mmap_read_unlock(pages->source_mm); if (rc != 1) { if (WARN_ON(rc >= 0)) rc = -EINVAL; goto out_mmput; } copy_data_page(page, data, offset, length, flags); unpin_user_page(page); rc = 0; out_mmput: mmput(pages->source_mm); return rc; } /** * iopt_pages_rw_access - Copy to/from a linear slice of the pages * @pages: pages to act on * @start_byte: First byte of pages to copy to/from * @data: Kernel buffer to get/put the data * @length: Number of bytes to copy * @flags: IOMMUFD_ACCESS_RW_* flags * * This will find each page in the range, kmap it and then memcpy to/from * the given kernel buffer. */ int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, void *data, unsigned long length, unsigned int flags) { unsigned long start_index = start_byte / PAGE_SIZE; unsigned long last_index = (start_byte + length - 1) / PAGE_SIZE; bool change_mm = current->mm != pages->source_mm; int rc = 0; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && (flags & __IOMMUFD_ACCESS_RW_SLOW_PATH)) change_mm = true; if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) return -EPERM; if (pages->type == IOPT_ADDRESS_FILE) return iopt_pages_rw_slow(pages, start_index, last_index, start_byte % PAGE_SIZE, data, length, flags); if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(pages->type != IOPT_ADDRESS_USER)) return -EINVAL; if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) { if (start_index == last_index) return iopt_pages_rw_page(pages, start_index, start_byte % PAGE_SIZE, data, length, flags); return iopt_pages_rw_slow(pages, start_index, last_index, start_byte % PAGE_SIZE, data, length, flags); } /* * Try to copy using copy_to_user(). We do this as a fast path and * ignore any pinning inconsistencies, unlike a real DMA path. */ if (change_mm) { if (!mmget_not_zero(pages->source_mm)) return iopt_pages_rw_slow(pages, start_index, last_index, start_byte % PAGE_SIZE, data, length, flags); kthread_use_mm(pages->source_mm); } if (flags & IOMMUFD_ACCESS_RW_WRITE) { if (copy_to_user(pages->uptr + start_byte, data, length)) rc = -EFAULT; } else { if (copy_from_user(data, pages->uptr + start_byte, length)) rc = -EFAULT; } if (change_mm) { kthread_unuse_mm(pages->source_mm); mmput(pages->source_mm); } return rc; } static struct iopt_pages_access * iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, unsigned long last) { struct interval_tree_node *node; lockdep_assert_held(&pages->mutex); /* There can be overlapping ranges in this interval tree */ for (node = interval_tree_iter_first(&pages->access_itree, index, last); node; node = interval_tree_iter_next(node, index, last)) if (node->start == index && node->last == last) return container_of(node, struct iopt_pages_access, node); return NULL; } /** * iopt_area_add_access() - Record an in-knerel access for PFNs * @area: The source of PFNs * @start_index: First page index * @last_index: Inclusive last page index * @out_pages: Output list of struct page's representing the PFNs * @flags: IOMMUFD_ACCESS_RW_* flags * @lock_area: Fail userspace munmap on this area * * Record that an in-kernel access will be accessing the pages, ensure they are * pinned, and return the PFNs as a simple list of 'struct page *'. * * This should be undone through a matching call to iopt_area_remove_access() */ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, unsigned long last_index, struct page **out_pages, unsigned int flags, bool lock_area) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; int rc; if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) return -EPERM; mutex_lock(&pages->mutex); access = iopt_pages_get_exact_access(pages, start_index, last_index); if (access) { area->num_accesses++; if (lock_area) area->num_locks++; access->users++; iopt_pages_fill_from_xarray(pages, start_index, last_index, out_pages); mutex_unlock(&pages->mutex); return 0; } access = kzalloc(sizeof(*access), GFP_KERNEL_ACCOUNT); if (!access) { rc = -ENOMEM; goto err_unlock; } rc = iopt_pages_fill_xarray(pages, start_index, last_index, out_pages); if (rc) goto err_free; access->node.start = start_index; access->node.last = last_index; access->users = 1; area->num_accesses++; if (lock_area) area->num_locks++; interval_tree_insert(&access->node, &pages->access_itree); mutex_unlock(&pages->mutex); return 0; err_free: kfree(access); err_unlock: mutex_unlock(&pages->mutex); return rc; } /** * iopt_area_remove_access() - Release an in-kernel access for PFNs * @area: The source of PFNs * @start_index: First page index * @last_index: Inclusive last page index * @unlock_area: Must match the matching iopt_area_add_access()'s lock_area * * Undo iopt_area_add_access() and unpin the pages if necessary. The caller * must stop using the PFNs before calling this. */ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, unsigned long last_index, bool unlock_area) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; mutex_lock(&pages->mutex); access = iopt_pages_get_exact_access(pages, start_index, last_index); if (WARN_ON(!access)) goto out_unlock; WARN_ON(area->num_accesses == 0 || access->users == 0); if (unlock_area) { WARN_ON(area->num_locks == 0); area->num_locks--; } area->num_accesses--; access->users--; if (access->users) goto out_unlock; interval_tree_remove(&access->node, &pages->access_itree); iopt_pages_unfill_xarray(pages, start_index, last_index); kfree(access); out_unlock: mutex_unlock(&pages->mutex); } |
| 89 5 964 67 67 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Wireless configuration interface internals. * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2018-2025 Intel Corporation */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H #include <linux/list.h> #include <linux/netdevice.h> #include <linux/rbtree.h> #include <linux/debugfs.h> #include <linux/rfkill.h> #include <linux/workqueue.h> #include <linux/rtnetlink.h> #include <net/genetlink.h> #include <net/cfg80211.h> #include "reg.h" #define WIPHY_IDX_INVALID -1 struct cfg80211_scan_request_int { struct cfg80211_scan_info info; bool notified; /* must be last - variable members */ struct cfg80211_scan_request req; }; struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; /* rfkill support */ struct rfkill_ops rfkill_ops; struct work_struct rfkill_block; /* ISO / IEC 3166 alpha2 for which this device is receiving * country IEs on, this can help disregard country IEs from APs * on the same alpha2 quickly. The alpha2 may differ from * cfg80211_regdomain's alpha2 when an intersection has occurred. * If the AP is reconfigured this can also be used to tell us if * the country on the country IE changed. */ char country_ie_alpha2[2]; /* * the driver requests the regulatory core to set this regulatory * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED * devices using the regulatory_set_wiphy_regd() API */ const struct ieee80211_regdomain *requested_regd; /* If a Country IE has been received this tells us the environment * which its telling us its in. This defaults to ENVIRON_ANY */ enum environment_cap env; /* wiphy index, internal only */ int wiphy_idx; /* protected by RTNL */ int devlist_generation, wdev_id; int opencount; wait_queue_head_t dev_wait; struct list_head beacon_registrations; spinlock_t beacon_registrations_lock; /* protected by RTNL only */ int num_running_ifaces; int num_running_monitor_ifaces; u64 cookie_counter; /* BSSes/scanning */ spinlock_t bss_lock; struct list_head bss_list; struct rb_root bss_tree; u32 bss_generation; u32 bss_entries; struct cfg80211_scan_request_int *scan_req; /* protected by RTNL */ struct cfg80211_scan_request_int *int_scan_req; struct sk_buff *scan_msg; struct list_head sched_scan_req_list; time64_t suspend_at; struct wiphy_work scan_done_wk; struct genl_info *cur_cmd_info; struct work_struct conn_work; struct work_struct event_work; struct delayed_work dfs_update_channels_wk; struct wireless_dev *background_radar_wdev; struct cfg80211_chan_def background_radar_chandef; struct delayed_work background_cac_done_wk; struct work_struct background_cac_abort_wk; /* netlink port which started critical protocol (0 means not started) */ u32 crit_proto_nlportid; struct cfg80211_coalesce *coalesce; struct work_struct destroy_work; struct wiphy_work sched_scan_stop_wk; struct work_struct sched_scan_res_wk; struct cfg80211_chan_def radar_chandef; struct work_struct propagate_radar_detect_wk; struct cfg80211_chan_def cac_done_chandef; struct work_struct propagate_cac_done_wk; struct work_struct mgmt_registrations_update_wk; /* lock for all wdev lists */ spinlock_t mgmt_registrations_lock; struct work_struct wiphy_work; struct list_head wiphy_work_list; /* protects the list above */ spinlock_t wiphy_work_lock; bool suspended; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy) { BUG_ON(!wiphy); return container_of(wiphy, struct cfg80211_registered_device, wiphy); } static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { #ifdef CONFIG_PM int i; if (!rdev->wiphy.wowlan_config) return; for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++) kfree(rdev->wiphy.wowlan_config->patterns[i].mask); kfree(rdev->wiphy.wowlan_config->patterns); if (rdev->wiphy.wowlan_config->tcp && rdev->wiphy.wowlan_config->tcp->sock) sock_release(rdev->wiphy.wowlan_config->tcp->sock); kfree(rdev->wiphy.wowlan_config->tcp); kfree(rdev->wiphy.wowlan_config->nd_config); kfree(rdev->wiphy.wowlan_config); #endif } static inline u64 cfg80211_assign_cookie(struct cfg80211_registered_device *rdev) { u64 r = ++rdev->cookie_counter; if (WARN_ON(r == 0)) r = ++rdev->cookie_counter; return r; } extern struct workqueue_struct *cfg80211_wq; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; /* This is constructed like this so it can be used in if/else */ static inline int for_each_rdev_check_rtnl(void) { ASSERT_RTNL(); return 0; } #define for_each_rdev(rdev) \ if (for_each_rdev_check_rtnl()) {} else \ list_for_each_entry(rdev, &cfg80211_rdev_list, list) enum bss_source_type { BSS_SOURCE_DIRECT = 0, BSS_SOURCE_MBSSID, BSS_SOURCE_STA_PROFILE, }; struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; struct rb_node rbn; unsigned long ts; unsigned long refcount; atomic_t hold; /* time at the start of the reception of the first octet of the * timestamp field of the last beacon/probe received for this BSS. * The time is the TSF of the BSS specified by %parent_bssid. */ u64 parent_tsf; /* the BSS according to which %parent_tsf is set. This is set to * the BSS that the interface that requested the scan was connected to * when the beacon/probe was received. */ u8 parent_bssid[ETH_ALEN] __aligned(2); enum bss_source_type bss_source; /* must be last because of priv member */ struct cfg80211_bss pub; }; static inline struct cfg80211_internal_bss *bss_from_pub(struct cfg80211_bss *pub) { return container_of(pub, struct cfg80211_internal_bss, pub); } static inline void cfg80211_hold_bss(struct cfg80211_internal_bss *bss) { atomic_inc(&bss->hold); if (bss->pub.transmitted_bss) { bss = container_of(bss->pub.transmitted_bss, struct cfg80211_internal_bss, pub); atomic_inc(&bss->hold); } } static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss) { int r = atomic_dec_return(&bss->hold); WARN_ON(r < 0); if (bss->pub.transmitted_bss) { bss = container_of(bss->pub.transmitted_bss, struct cfg80211_internal_bss, pub); r = atomic_dec_return(&bss->hold); WARN_ON(r < 0); } } struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx); int get_wiphy_idx(struct wiphy *wiphy); struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); void cfg80211_init_wdev(struct wireless_dev *wdev); void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) { lockdep_assert_held(&rdev->wiphy.mtx); return rdev->num_running_ifaces == rdev->num_running_monitor_ifaces && rdev->num_running_ifaces > 0; } enum cfg80211_event_type { EVENT_CONNECT_RESULT, EVENT_ROAMED, EVENT_DISCONNECTED, EVENT_IBSS_JOINED, EVENT_STOPPED, EVENT_PORT_AUTHORIZED, }; struct cfg80211_event { struct list_head list; enum cfg80211_event_type type; union { struct cfg80211_connect_resp_params cr; struct cfg80211_roam_info rm; struct { const u8 *ie; size_t ie_len; u16 reason; bool locally_generated; } dc; struct { u8 bssid[ETH_ALEN]; struct ieee80211_channel *channel; } ij; struct { u8 peer_addr[ETH_ALEN]; const u8 *td_bitmap; u8 td_bitmap_len; } pa; }; }; struct cfg80211_cached_keys { struct key_params params[4]; u8 data[4][WLAN_KEY_LEN_WEP104]; int def; }; struct cfg80211_beacon_registration { struct list_head list; u32 nlportid; }; struct cfg80211_cqm_config { struct rcu_head rcu_head; u32 rssi_hyst; s32 last_rssi_event_value; enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; bool use_range_api; int n_rssi_thresholds; s32 rssi_thresholds[] __counted_by(n_rssi_thresholds); }; void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work); void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, unsigned int link, struct ieee80211_channel *channel); /* IBSS */ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_ibss_params *params, struct cfg80211_cached_keys *connkeys); void cfg80211_clear_ibss(struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel); int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); /* mesh */ extern const struct mesh_config default_mesh_config; extern const struct mesh_setup default_mesh_setup; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_chan_def *chandef); /* OCB */ int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ocb_setup *setup); int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev); /* AP */ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, int link, bool notify); /* MLME */ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_auth_request *req); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_assoc_request *req, struct netlink_ext_ack *extack); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, bool local_state_change); int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *ap_addr, const u8 *ie, int ie_len, u16 reason, bool local_state_change); void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, int match_len, bool multicast_rx, struct netlink_ext_ack *extack); void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk); void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); /* SME events */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, struct cfg80211_cached_keys *connkeys, const u8 *prev_bssid); void __cfg80211_connect_result(struct net_device *dev, struct cfg80211_connect_resp_params *params, bool wextev); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_autodisconnect_wk(struct work_struct *work); /* SME implementation */ void cfg80211_conn_work(struct work_struct *work); void cfg80211_sme_scan_done(struct net_device *dev); bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status); void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len); void cfg80211_sme_disassoc(struct wireless_dev *wdev); void cfg80211_sme_deauth(struct wireless_dev *wdev); void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, int key_idx, bool pairwise); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message); void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *req); int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev, bool want_multi); void cfg80211_sched_scan_results_wk(struct work_struct *work); int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *req, bool driver_initiated); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, u64 reqid, bool driver_initiated); void cfg80211_upload_connect_keys(struct wireless_dev *wdev); int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev, struct wiphy_work *end); void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); int cfg80211_scan(struct cfg80211_registered_device *rdev); extern struct work_struct cfg80211_disconnect_work; #define NL80211_BSS_USE_FOR_ALL (NL80211_BSS_USE_FOR_NORMAL | \ NL80211_BSS_USE_FOR_MLD_LINK) void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); void cfg80211_dfs_channels_update_work(struct work_struct *work); void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); int cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_chan_def *chandef); void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev); void cfg80211_background_cac_done_wk(struct work_struct *work); void cfg80211_background_cac_abort_wk(struct work_struct *work); bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan); bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev); bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan, bool primary_only); bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, struct ieee80211_channel *chan, bool primary_only); bool _cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags, u32 permitting_flags); static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { unsigned long end = jiffies; if (end >= start) return jiffies_to_msecs(end - start); return jiffies_to_msecs(end + (ULONG_MAX - start) + 1); } int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_chan_def *chandef); int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, const u8 *rates, unsigned int n_rates, u32 *mask); int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, u32 beacon_int); void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *tmp, bool signal_valid, unsigned long ts); enum ieee80211_ap_reg_power cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len); #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) #else /* * Trick to enable using it as a condition, * and also not give a warning when it's * not used that way. */ #define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; }) #endif void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid); void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev); void cfg80211_pmsr_free_wk(struct work_struct *work); void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id); void cfg80211_remove_links(struct wireless_dev *wdev); int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask); int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_ml_reconf_req *req); /** * struct cfg80211_colocated_ap - colocated AP information * * @list: linked list to all colocated APs * @bssid: BSSID of the reported AP * @ssid: SSID of the reported AP * @ssid_len: length of the ssid * @center_freq: frequency the reported AP is on * @unsolicited_probe: the reported AP is part of an ESS, where all the APs * that operate in the same channel as the reported AP and that might be * detected by a STA receiving this frame, are transmitting unsolicited * Probe Response frames every 20 TUs * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP * @same_ssid: the reported AP has the same SSID as the reporting AP * @multi_bss: the reported AP is part of a multiple BSSID set * @transmitted_bssid: the reported AP is the transmitting BSSID * @colocated_ess: all the APs that share the same ESS as the reported AP are * colocated and can be discovered via legacy bands. * @short_ssid_valid: short_ssid is valid and can be used * @short_ssid: the short SSID for this SSID * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP */ struct cfg80211_colocated_ap { struct list_head list; u8 bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; size_t ssid_len; u32 short_ssid; u32 center_freq; u8 unsolicited_probe:1, oct_recommended:1, same_ssid:1, multi_bss:1, transmitted_bssid:1, colocated_ess:1, short_ssid_valid:1; s8 psd_20; }; #if IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST) #define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym) #define VISIBLE_IF_CFG80211_KUNIT void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list); int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, struct list_head *list); size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, const u8 *subie, size_t subie_len, u8 *new_ie, size_t new_ie_len); #else #define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) #define VISIBLE_IF_CFG80211_KUNIT static #endif /* IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST) */ #endif /* __NET_WIRELESS_CORE_H */ |
| 8 1 3 6 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | // SPDX-License-Identifier: GPL-2.0-or-later /* * lib/ts_fsm.c A naive finite state machine text search approach * * Authors: Thomas Graf <tgraf@suug.ch> * * ========================================================================== * * A finite state machine consists of n states (struct ts_fsm_token) * representing the pattern as a finite automaton. The data is read * sequentially on an octet basis. Every state token specifies the number * of recurrences and the type of value accepted which can be either a * specific character or ctype based set of characters. The available * type of recurrences include 1, (0|1), [0 n], and [1 n]. * * The algorithm differs between strict/non-strict mode specifying * whether the pattern has to start at the first octet. Strict mode * is enabled by default and can be disabled by inserting * TS_FSM_HEAD_IGNORE as the first token in the chain. * * The runtime performance of the algorithm should be around O(n), * however while in strict mode the average runtime can be better. */ #include <linux/module.h> #include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/textsearch.h> #include <linux/textsearch_fsm.h> struct ts_fsm { unsigned int ntokens; struct ts_fsm_token tokens[]; }; /* other values derived from ctype.h */ #define _A 0x100 /* ascii */ #define _W 0x200 /* wildcard */ /* Map to _ctype flags and some magic numbers */ static const u16 token_map[TS_FSM_TYPE_MAX+1] = { [TS_FSM_SPECIFIC] = 0, [TS_FSM_WILDCARD] = _W, [TS_FSM_CNTRL] = _C, [TS_FSM_LOWER] = _L, [TS_FSM_UPPER] = _U, [TS_FSM_PUNCT] = _P, [TS_FSM_SPACE] = _S, [TS_FSM_DIGIT] = _D, [TS_FSM_XDIGIT] = _D | _X, [TS_FSM_ALPHA] = _U | _L, [TS_FSM_ALNUM] = _U | _L | _D, [TS_FSM_PRINT] = _P | _U | _L | _D | _SP, [TS_FSM_GRAPH] = _P | _U | _L | _D, [TS_FSM_ASCII] = _A, }; static const u16 token_lookup_tbl[256] = { _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 0- 3 */ _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 4- 7 */ _W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S, /* 8- 11 */ _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C, /* 12- 15 */ _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 16- 19 */ _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 20- 23 */ _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 24- 27 */ _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 28- 31 */ _W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 32- 35 */ _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 36- 39 */ _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 40- 43 */ _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 44- 47 */ _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 48- 51 */ _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 52- 55 */ _W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P, /* 56- 59 */ _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 60- 63 */ _W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, /* 64- 67 */ _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U, /* 68- 71 */ _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 72- 75 */ _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 76- 79 */ _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 80- 83 */ _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 84- 87 */ _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P, /* 88- 91 */ _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 92- 95 */ _W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, /* 96- 99 */ _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L, /* 100-103 */ _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 104-107 */ _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 108-111 */ _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 112-115 */ _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 116-119 */ _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P, /* 120-123 */ _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C, /* 124-127 */ _W, _W, _W, _W, /* 128-131 */ _W, _W, _W, _W, /* 132-135 */ _W, _W, _W, _W, /* 136-139 */ _W, _W, _W, _W, /* 140-143 */ _W, _W, _W, _W, /* 144-147 */ _W, _W, _W, _W, /* 148-151 */ _W, _W, _W, _W, /* 152-155 */ _W, _W, _W, _W, /* 156-159 */ _W|_S|_SP, _W|_P, _W|_P, _W|_P, /* 160-163 */ _W|_P, _W|_P, _W|_P, _W|_P, /* 164-167 */ _W|_P, _W|_P, _W|_P, _W|_P, /* 168-171 */ _W|_P, _W|_P, _W|_P, _W|_P, /* 172-175 */ _W|_P, _W|_P, _W|_P, _W|_P, /* 176-179 */ _W|_P, _W|_P, _W|_P, _W|_P, /* 180-183 */ _W|_P, _W|_P, _W|_P, _W|_P, /* 184-187 */ _W|_P, _W|_P, _W|_P, _W|_P, /* 188-191 */ _W|_U, _W|_U, _W|_U, _W|_U, /* 192-195 */ _W|_U, _W|_U, _W|_U, _W|_U, /* 196-199 */ _W|_U, _W|_U, _W|_U, _W|_U, /* 200-203 */ _W|_U, _W|_U, _W|_U, _W|_U, /* 204-207 */ _W|_U, _W|_U, _W|_U, _W|_U, /* 208-211 */ _W|_U, _W|_U, _W|_U, _W|_P, /* 212-215 */ _W|_U, _W|_U, _W|_U, _W|_U, /* 216-219 */ _W|_U, _W|_U, _W|_U, _W|_L, /* 220-223 */ _W|_L, _W|_L, _W|_L, _W|_L, /* 224-227 */ _W|_L, _W|_L, _W|_L, _W|_L, /* 228-231 */ _W|_L, _W|_L, _W|_L, _W|_L, /* 232-235 */ _W|_L, _W|_L, _W|_L, _W|_L, /* 236-239 */ _W|_L, _W|_L, _W|_L, _W|_L, /* 240-243 */ _W|_L, _W|_L, _W|_L, _W|_P, /* 244-247 */ _W|_L, _W|_L, _W|_L, _W|_L, /* 248-251 */ _W|_L, _W|_L, _W|_L, _W|_L}; /* 252-255 */ static inline int match_token(struct ts_fsm_token *t, u8 d) { if (t->type) return (token_lookup_tbl[d] & t->type) != 0; else return t->value == d; } static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state) { struct ts_fsm *fsm = ts_config_priv(conf); struct ts_fsm_token *cur = NULL, *next; unsigned int match_start, block_idx = 0, tok_idx; unsigned block_len = 0, strict, consumed = state->offset; const u8 *data; #define GET_NEXT_BLOCK() \ ({ consumed += block_idx; \ block_idx = 0; \ block_len = conf->get_next_block(consumed, &data, conf, state); }) #define TOKEN_MISMATCH() \ do { \ if (strict) \ goto no_match; \ block_idx++; \ goto startover; \ } while(0) #define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK()) if (end_of_data()) goto no_match; strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE; startover: match_start = consumed + block_idx; for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) { cur = &fsm->tokens[tok_idx]; if (likely(tok_idx < (fsm->ntokens - 1))) next = &fsm->tokens[tok_idx + 1]; else next = NULL; switch (cur->recur) { case TS_FSM_SINGLE: if (end_of_data()) goto no_match; if (!match_token(cur, data[block_idx])) TOKEN_MISMATCH(); break; case TS_FSM_PERHAPS: if (end_of_data() || !match_token(cur, data[block_idx])) continue; break; case TS_FSM_MULTI: if (end_of_data()) goto no_match; if (!match_token(cur, data[block_idx])) TOKEN_MISMATCH(); block_idx++; fallthrough; case TS_FSM_ANY: if (next == NULL) goto found_match; if (end_of_data()) continue; while (!match_token(next, data[block_idx])) { if (!match_token(cur, data[block_idx])) TOKEN_MISMATCH(); block_idx++; if (end_of_data()) goto no_match; } continue; /* * Optimization: Prefer small local loop over jumping * back and forth until garbage at head is munched. */ case TS_FSM_HEAD_IGNORE: if (end_of_data()) continue; while (!match_token(next, data[block_idx])) { /* * Special case, don't start over upon * a mismatch, give the user the * chance to specify the type of data * allowed to be ignored. */ if (!match_token(cur, data[block_idx])) goto no_match; block_idx++; if (end_of_data()) goto no_match; } match_start = consumed + block_idx; continue; } block_idx++; } if (end_of_data()) goto found_match; no_match: return UINT_MAX; found_match: state->offset = consumed + block_idx; return match_start; } static struct ts_config *fsm_init(const void *pattern, unsigned int len, gfp_t gfp_mask, int flags) { int i, err = -EINVAL; struct ts_config *conf; struct ts_fsm *fsm; struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern; unsigned int ntokens = len / sizeof(*tokens); size_t priv_size = sizeof(*fsm) + len; if (len % sizeof(struct ts_fsm_token) || ntokens < 1) goto errout; if (flags & TS_IGNORECASE) goto errout; for (i = 0; i < ntokens; i++) { struct ts_fsm_token *t = &tokens[i]; if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX) goto errout; if (t->recur == TS_FSM_HEAD_IGNORE && (i != 0 || i == (ntokens - 1))) goto errout; } conf = alloc_ts_config(priv_size, gfp_mask); if (IS_ERR(conf)) return conf; conf->flags = flags; fsm = ts_config_priv(conf); fsm->ntokens = ntokens; memcpy(fsm->tokens, pattern, len); for (i = 0; i < fsm->ntokens; i++) { struct ts_fsm_token *t = &fsm->tokens[i]; t->type = token_map[t->type]; } return conf; errout: return ERR_PTR(err); } static void *fsm_get_pattern(struct ts_config *conf) { struct ts_fsm *fsm = ts_config_priv(conf); return fsm->tokens; } static unsigned int fsm_get_pattern_len(struct ts_config *conf) { struct ts_fsm *fsm = ts_config_priv(conf); return fsm->ntokens * sizeof(struct ts_fsm_token); } static struct ts_ops fsm_ops = { .name = "fsm", .find = fsm_find, .init = fsm_init, .get_pattern = fsm_get_pattern, .get_pattern_len = fsm_get_pattern_len, .owner = THIS_MODULE, .list = LIST_HEAD_INIT(fsm_ops.list) }; static int __init init_fsm(void) { return textsearch_register(&fsm_ops); } static void __exit exit_fsm(void) { textsearch_unregister(&fsm_ops); } MODULE_DESCRIPTION("naive finite state machine text search"); MODULE_LICENSE("GPL"); module_init(init_fsm); module_exit(exit_fsm); |
| 1 1 2 32 89 21 52 7 62 16 8 9 21 20 18 2 10 8 15 156 36 155 71 71 71 3 69 71 101 101 123 124 62 19 55 49 49 49 49 39 27 27 24 24 24 68 1 21 49 8 31 25 33 33 25 155 1 24 1 48 6 44 1 1 49 45 6 45 45 45 2 50 1 49 49 45 6 6 8 8 8 33 33 33 6 33 40 40 4 40 3 33 33 33 3 33 6 33 2 33 32 1 33 33 2 24 3 54 2 3 16 9 26 23 26 22 6 13 37 9 9 9 9 9 13 13 85 12 29 29 66 66 60 60 39 17 2 39 2 24 37 7 7 1 1 1 10 9 29 3 3 11 8 1 7 1 14 29 45 7 9 27 3 31 29 18 18 17 4 1 11 12 2 4 5 6 1 4 10 10 11 1 1 34 2 2 2 65 66 2 7 2 38 25 58 58 49 57 11 46 3 2 1 22 22 2 2 18 1 1 1 1 1 1 8 2 2 3 13 64 6 16 16 3 1 1 1 2 1 13 1 2 3 2 3 2 2 2 1 3 2 13 2 1 12 12 12 2 14 14 130 29 2 1 3 4 4 3 64 28 1 5 5 1 3 5 3 8 6 2 54 5 55 55 5 55 54 85 85 67 9 46 35 2 32 11 24 34 43 18 16 9 1 2 2 2 1 3 1 3 3 57 39 42 1 16 39 2 2 36 3 1 2 37 4 4 11 5 8 2 5 26 27 1 13 13 2 12 12 1 13 13 13 13 12 12 2 2 1 13 13 63 63 10 67 41 66 8 12 10 5 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2009 Red Hat, Inc. * Copyright (C) 2006 Rusty Russell IBM Corporation * * Author: Michael S. Tsirkin <mst@redhat.com> * * Inspiration, some code, and most witty comments come from * Documentation/virtual/lguest/lguest.c, by Rusty Russell * * Generic code for virtio server in host kernel. */ #include <linux/eventfd.h> #include <linux/vhost.h> #include <linux/uio.h> #include <linux/mm.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/file.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/kthread.h> #include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/sched/vhost_task.h> #include <linux/interval_tree_generic.h> #include <linux/nospec.h> #include <linux/kcov.h> #include "vhost.h" static ushort max_mem_regions = 64; module_param(max_mem_regions, ushort, 0444); MODULE_PARM_DESC(max_mem_regions, "Maximum number of memory regions in memory map. (default: 64)"); static int max_iotlb_entries = 2048; module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. (default: 2048)"); static bool fork_from_owner_default = VHOST_FORK_OWNER_TASK; #ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL module_param(fork_from_owner_default, bool, 0444); MODULE_PARM_DESC(fork_from_owner_default, "Set task mode as the default(default: Y)"); #endif enum { VHOST_MEMORY_F_LOG = 0x1, }; #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { vq->user_be = !virtio_legacy_is_little_endian(); } static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) { vq->user_be = true; } static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) { vq->user_be = false; } static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { struct vhost_vring_state s; if (vq->private_data) return -EBUSY; if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; if (s.num != VHOST_VRING_LITTLE_ENDIAN && s.num != VHOST_VRING_BIG_ENDIAN) return -EINVAL; if (s.num == VHOST_VRING_BIG_ENDIAN) vhost_enable_cross_endian_big(vq); else vhost_enable_cross_endian_little(vq); return 0; } static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, int __user *argp) { struct vhost_vring_state s = { .index = idx, .num = vq->user_be }; if (copy_to_user(argp, &s, sizeof(s))) return -EFAULT; return 0; } static void vhost_init_is_le(struct vhost_virtqueue *vq) { /* Note for legacy virtio: user_be is initialized at reset time * according to the host endianness. If userspace does not set an * explicit endianness, the default behavior is native endian, as * expected by legacy virtio. */ vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; } #else static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { } static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { return -ENOIOCTLCMD; } static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, int __user *argp) { return -ENOIOCTLCMD; } static void vhost_init_is_le(struct vhost_virtqueue *vq) { vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ static void vhost_reset_is_le(struct vhost_virtqueue *vq) { vhost_init_is_le(vq); } struct vhost_flush_struct { struct vhost_work work; struct completion wait_event; }; static void vhost_flush_work(struct vhost_work *work) { struct vhost_flush_struct *s; s = container_of(work, struct vhost_flush_struct, work); complete(&s->wait_event); } static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { struct vhost_poll *poll; poll = container_of(pt, struct vhost_poll, table); poll->wqh = wqh; add_wait_queue(wqh, &poll->wait); } static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); struct vhost_work *work = &poll->work; if (!(key_to_poll(key) & poll->mask)) return 0; if (!poll->dev->use_worker) work->fn(work); else vhost_poll_queue(poll); return 0; } void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { clear_bit(VHOST_WORK_QUEUED, &work->flags); work->fn = fn; } EXPORT_SYMBOL_GPL(vhost_work_init); /* Init poll structure */ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, __poll_t mask, struct vhost_dev *dev, struct vhost_virtqueue *vq) { init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; poll->dev = dev; poll->wqh = NULL; poll->vq = vq; vhost_work_init(&poll->work, fn); } EXPORT_SYMBOL_GPL(vhost_poll_init); /* Start polling a file. We add ourselves to file's wait queue. The caller must * keep a reference to a file until after vhost_poll_stop is called. */ int vhost_poll_start(struct vhost_poll *poll, struct file *file) { __poll_t mask; if (poll->wqh) return 0; mask = vfs_poll(file, &poll->table); if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); if (mask & EPOLLERR) { vhost_poll_stop(poll); return -EINVAL; } return 0; } EXPORT_SYMBOL_GPL(vhost_poll_start); /* Stop polling a file. After this function returns, it becomes safe to drop the * file reference. You must also flush afterwards. */ void vhost_poll_stop(struct vhost_poll *poll) { if (poll->wqh) { remove_wait_queue(poll->wqh, &poll->wait); poll->wqh = NULL; } } EXPORT_SYMBOL_GPL(vhost_poll_stop); static void vhost_worker_queue(struct vhost_worker *worker, struct vhost_work *work) { if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { /* We can only add the work to the list after we're * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &worker->work_list); worker->ops->wakeup(worker); } } bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) { struct vhost_worker *worker; bool queued = false; rcu_read_lock(); worker = rcu_dereference(vq->worker); if (worker) { queued = true; vhost_worker_queue(worker, work); } rcu_read_unlock(); return queued; } EXPORT_SYMBOL_GPL(vhost_vq_work_queue); /** * __vhost_worker_flush - flush a worker * @worker: worker to flush * * The worker's flush_mutex must be held. */ static void __vhost_worker_flush(struct vhost_worker *worker) { struct vhost_flush_struct flush; if (!worker->attachment_cnt || worker->killed) return; init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); vhost_worker_queue(worker, &flush.work); /* * Drop mutex in case our worker is killed and it needs to take the * mutex to force cleanup. */ mutex_unlock(&worker->mutex); wait_for_completion(&flush.wait_event); mutex_lock(&worker->mutex); } static void vhost_worker_flush(struct vhost_worker *worker) { mutex_lock(&worker->mutex); __vhost_worker_flush(worker); mutex_unlock(&worker->mutex); } void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_worker *worker; unsigned long i; xa_for_each(&dev->worker_xa, i, worker) vhost_worker_flush(worker); } EXPORT_SYMBOL_GPL(vhost_dev_flush); /* A lockless hint for busy polling code to exit the loop */ bool vhost_vq_has_work(struct vhost_virtqueue *vq) { struct vhost_worker *worker; bool has_work = false; rcu_read_lock(); worker = rcu_dereference(vq->worker); if (worker && !llist_empty(&worker->work_list)) has_work = true; rcu_read_unlock(); return has_work; } EXPORT_SYMBOL_GPL(vhost_vq_has_work); void vhost_poll_queue(struct vhost_poll *poll) { vhost_vq_work_queue(poll->vq, &poll->work); } EXPORT_SYMBOL_GPL(vhost_poll_queue); static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) { int j; for (j = 0; j < VHOST_NUM_ADDRS; j++) vq->meta_iotlb[j] = NULL; } static void vhost_vq_meta_reset(struct vhost_dev *d) { int i; for (i = 0; i < d->nvqs; ++i) __vhost_vq_meta_reset(d->vqs[i]); } static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) { call_ctx->ctx = NULL; memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); } bool vhost_vq_is_setup(struct vhost_virtqueue *vq) { return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); } EXPORT_SYMBOL_GPL(vhost_vq_is_setup); static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { vq->num = 1; vq->desc = NULL; vq->avail = NULL; vq->used = NULL; vq->last_avail_idx = 0; vq->next_avail_head = 0; vq->avail_idx = 0; vq->last_used_idx = 0; vq->signalled_used = 0; vq->signalled_used_valid = false; vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; vq->private_data = NULL; virtio_features_zero(vq->acked_features_array); vq->acked_backend_features = 0; vq->log_base = NULL; vq->error_ctx = NULL; vq->kick = NULL; vq->log_ctx = NULL; vhost_disable_cross_endian(vq); vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; rcu_assign_pointer(vq->worker, NULL); vhost_vring_call_reset(&vq->call_ctx); __vhost_vq_meta_reset(vq); } static int vhost_run_work_kthread_list(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; struct vhost_dev *dev = worker->dev; struct llist_node *node; kthread_use_mm(dev->mm); for (;;) { /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); break; } node = llist_del_all(&worker->work_list); if (!node) schedule(); node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); kcov_remote_start_common(worker->kcov_handle); work->fn(work); kcov_remote_stop(); cond_resched(); } } kthread_unuse_mm(dev->mm); return 0; } static bool vhost_run_work_list(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; struct llist_node *node; node = llist_del_all(&worker->work_list); if (node) { __set_current_state(TASK_RUNNING); node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); kcov_remote_start_common(worker->kcov_handle); work->fn(work); kcov_remote_stop(); cond_resched(); } } return !!node; } static void vhost_worker_killed(void *data) { struct vhost_worker *worker = data; struct vhost_dev *dev = worker->dev; struct vhost_virtqueue *vq; int i, attach_cnt = 0; mutex_lock(&worker->mutex); worker->killed = true; for (i = 0; i < dev->nvqs; i++) { vq = dev->vqs[i]; mutex_lock(&vq->mutex); if (worker == rcu_dereference_check(vq->worker, lockdep_is_held(&vq->mutex))) { rcu_assign_pointer(vq->worker, NULL); attach_cnt++; } mutex_unlock(&vq->mutex); } worker->attachment_cnt -= attach_cnt; if (attach_cnt) synchronize_rcu(); /* * Finish vhost_worker_flush calls and any other works that snuck in * before the synchronize_rcu. */ vhost_run_work_list(worker); mutex_unlock(&worker->mutex); } static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) { kfree(vq->indirect); vq->indirect = NULL; kfree(vq->log); vq->log = NULL; kfree(vq->heads); vq->heads = NULL; kfree(vq->nheads); vq->nheads = NULL; } /* Helper to allocate iovec buffers for all vqs. */ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { struct vhost_virtqueue *vq; int i; for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->indirect = kmalloc_array(UIO_MAXIOV, sizeof(*vq->indirect), GFP_KERNEL); vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), GFP_KERNEL); vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), GFP_KERNEL); vq->nheads = kmalloc_array(dev->iov_limit, sizeof(*vq->nheads), GFP_KERNEL); if (!vq->indirect || !vq->log || !vq->heads || !vq->nheads) goto err_nomem; } return 0; err_nomem: for (; i >= 0; --i) vhost_vq_free_iovecs(dev->vqs[i]); return -ENOMEM; } static void vhost_dev_free_iovecs(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) vhost_vq_free_iovecs(dev->vqs[i]); } bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len) { struct vhost_dev *dev = vq->dev; if ((dev->byte_weight && total_len >= dev->byte_weight) || pkts >= dev->weight) { vhost_poll_queue(&vq->poll); return true; } return false; } EXPORT_SYMBOL_GPL(vhost_exceeds_weight); static size_t vhost_get_avail_size(struct vhost_virtqueue *vq, unsigned int num) { size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return size_add(struct_size(vq->avail, ring, num), event); } static size_t vhost_get_used_size(struct vhost_virtqueue *vq, unsigned int num) { size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return size_add(struct_size(vq->used, ring, num), event); } static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, unsigned int num) { return sizeof(*vq->desc) * num; } void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, bool use_worker, int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg)) { struct vhost_virtqueue *vq; int i; dev->vqs = vqs; dev->nvqs = nvqs; mutex_init(&dev->mutex); dev->log_ctx = NULL; dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; dev->use_worker = use_worker; dev->msg_handler = msg_handler; dev->fork_owner = fork_from_owner_default; init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC); for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; vq->nheads = NULL; vq->dev = dev; mutex_init(&vq->mutex); vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, EPOLLIN, dev, vq); } } EXPORT_SYMBOL_GPL(vhost_dev_init); /* Caller should have device mutex */ long vhost_dev_check_owner(struct vhost_dev *dev) { /* Are you the owner? If not, I don't think you mean to do that */ return dev->mm == current->mm ? 0 : -EPERM; } EXPORT_SYMBOL_GPL(vhost_dev_check_owner); struct vhost_attach_cgroups_struct { struct vhost_work work; struct task_struct *owner; int ret; }; static void vhost_attach_cgroups_work(struct vhost_work *work) { struct vhost_attach_cgroups_struct *s; s = container_of(work, struct vhost_attach_cgroups_struct, work); s->ret = cgroup_attach_task_all(s->owner, current); } static int vhost_attach_task_to_cgroups(struct vhost_worker *worker) { struct vhost_attach_cgroups_struct attach; int saved_cnt; attach.owner = current; vhost_work_init(&attach.work, vhost_attach_cgroups_work); vhost_worker_queue(worker, &attach.work); mutex_lock(&worker->mutex); /* * Bypass attachment_cnt check in __vhost_worker_flush: * Temporarily change it to INT_MAX to bypass the check */ saved_cnt = worker->attachment_cnt; worker->attachment_cnt = INT_MAX; __vhost_worker_flush(worker); worker->attachment_cnt = saved_cnt; mutex_unlock(&worker->mutex); return attach.ret; } /* Caller should have device mutex */ bool vhost_dev_has_owner(struct vhost_dev *dev) { return dev->mm; } EXPORT_SYMBOL_GPL(vhost_dev_has_owner); static void vhost_attach_mm(struct vhost_dev *dev) { /* No owner, become one */ if (dev->use_worker) { dev->mm = get_task_mm(current); } else { /* vDPA device does not use worker thread, so there's * no need to hold the address space for mm. This helps * to avoid deadlock in the case of mmap() which may * hold the refcnt of the file and depends on release * method to remove vma. */ dev->mm = current->mm; mmgrab(dev->mm); } } static void vhost_detach_mm(struct vhost_dev *dev) { if (!dev->mm) return; if (dev->use_worker) mmput(dev->mm); else mmdrop(dev->mm); dev->mm = NULL; } static void vhost_worker_destroy(struct vhost_dev *dev, struct vhost_worker *worker) { if (!worker) return; WARN_ON(!llist_empty(&worker->work_list)); xa_erase(&dev->worker_xa, worker->id); worker->ops->stop(worker); kfree(worker); } static void vhost_workers_free(struct vhost_dev *dev) { struct vhost_worker *worker; unsigned long i; if (!dev->use_worker) return; for (i = 0; i < dev->nvqs; i++) rcu_assign_pointer(dev->vqs[i]->worker, NULL); /* * Free the default worker we created and cleanup workers userspace * created but couldn't clean up (it forgot or crashed). */ xa_for_each(&dev->worker_xa, i, worker) vhost_worker_destroy(dev, worker); xa_destroy(&dev->worker_xa); } static void vhost_task_wakeup(struct vhost_worker *worker) { return vhost_task_wake(worker->vtsk); } static void vhost_kthread_wakeup(struct vhost_worker *worker) { wake_up_process(worker->kthread_task); } static void vhost_task_do_stop(struct vhost_worker *worker) { return vhost_task_stop(worker->vtsk); } static void vhost_kthread_do_stop(struct vhost_worker *worker) { kthread_stop(worker->kthread_task); } static int vhost_task_worker_create(struct vhost_worker *worker, struct vhost_dev *dev, const char *name) { struct vhost_task *vtsk; u32 id; int ret; vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); if (IS_ERR(vtsk)) return PTR_ERR(vtsk); worker->vtsk = vtsk; vhost_task_start(vtsk); ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); if (ret < 0) { vhost_task_do_stop(worker); return ret; } worker->id = id; return 0; } static int vhost_kthread_worker_create(struct vhost_worker *worker, struct vhost_dev *dev, const char *name) { struct task_struct *task; u32 id; int ret; task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name); if (IS_ERR(task)) return PTR_ERR(task); worker->kthread_task = task; wake_up_process(task); ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); if (ret < 0) goto stop_worker; ret = vhost_attach_task_to_cgroups(worker); if (ret) goto stop_worker; worker->id = id; return 0; stop_worker: vhost_kthread_do_stop(worker); return ret; } static const struct vhost_worker_ops kthread_ops = { .create = vhost_kthread_worker_create, .stop = vhost_kthread_do_stop, .wakeup = vhost_kthread_wakeup, }; static const struct vhost_worker_ops vhost_task_ops = { .create = vhost_task_worker_create, .stop = vhost_task_do_stop, .wakeup = vhost_task_wakeup, }; static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) { struct vhost_worker *worker; char name[TASK_COMM_LEN]; int ret; const struct vhost_worker_ops *ops = dev->fork_owner ? &vhost_task_ops : &kthread_ops; worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); if (!worker) return NULL; worker->dev = dev; worker->ops = ops; snprintf(name, sizeof(name), "vhost-%d", current->pid); mutex_init(&worker->mutex); init_llist_head(&worker->work_list); worker->kcov_handle = kcov_common_handle(); ret = ops->create(worker, dev, name); if (ret < 0) goto free_worker; return worker; free_worker: kfree(worker); return NULL; } /* Caller must have device mutex */ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, struct vhost_worker *worker) { struct vhost_worker *old_worker; mutex_lock(&worker->mutex); if (worker->killed) { mutex_unlock(&worker->mutex); return; } mutex_lock(&vq->mutex); old_worker = rcu_dereference_check(vq->worker, lockdep_is_held(&vq->mutex)); rcu_assign_pointer(vq->worker, worker); worker->attachment_cnt++; if (!old_worker) { mutex_unlock(&vq->mutex); mutex_unlock(&worker->mutex); return; } mutex_unlock(&vq->mutex); mutex_unlock(&worker->mutex); /* * Take the worker mutex to make sure we see the work queued from * device wide flushes which doesn't use RCU for execution. */ mutex_lock(&old_worker->mutex); if (old_worker->killed) { mutex_unlock(&old_worker->mutex); return; } /* * We don't want to call synchronize_rcu for every vq during setup * because it will slow down VM startup. If we haven't done * VHOST_SET_VRING_KICK and not done the driver specific * SET_ENDPOINT/RUNNING then we can skip the sync since there will * not be any works queued for scsi and net. */ mutex_lock(&vq->mutex); if (!vhost_vq_get_backend(vq) && !vq->kick) { mutex_unlock(&vq->mutex); old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); /* * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. * Warn if it adds support for multiple workers but forgets to * handle the early queueing case. */ WARN_ON(!old_worker->attachment_cnt && !llist_empty(&old_worker->work_list)); return; } mutex_unlock(&vq->mutex); /* Make sure new vq queue/flush/poll calls see the new worker */ synchronize_rcu(); /* Make sure whatever was queued gets run */ __vhost_worker_flush(old_worker); old_worker->attachment_cnt--; mutex_unlock(&old_worker->mutex); } /* Caller must have device mutex */ static int vhost_vq_attach_worker(struct vhost_virtqueue *vq, struct vhost_vring_worker *info) { unsigned long index = info->worker_id; struct vhost_dev *dev = vq->dev; struct vhost_worker *worker; if (!dev->use_worker) return -EINVAL; worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); if (!worker || worker->id != info->worker_id) return -ENODEV; __vhost_vq_attach_worker(vq, worker); return 0; } /* Caller must have device mutex */ static int vhost_new_worker(struct vhost_dev *dev, struct vhost_worker_state *info) { struct vhost_worker *worker; worker = vhost_worker_create(dev); if (!worker) return -ENOMEM; info->worker_id = worker->id; return 0; } /* Caller must have device mutex */ static int vhost_free_worker(struct vhost_dev *dev, struct vhost_worker_state *info) { unsigned long index = info->worker_id; struct vhost_worker *worker; worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); if (!worker || worker->id != info->worker_id) return -ENODEV; mutex_lock(&worker->mutex); if (worker->attachment_cnt || worker->killed) { mutex_unlock(&worker->mutex); return -EBUSY; } /* * A flush might have raced and snuck in before attachment_cnt was set * to zero. Make sure flushes are flushed from the queue before * freeing. */ __vhost_worker_flush(worker); mutex_unlock(&worker->mutex); vhost_worker_destroy(dev, worker); return 0; } static int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp, struct vhost_virtqueue **vq, u32 *id) { u32 __user *idxp = argp; u32 idx; long r; r = get_user(idx, idxp); if (r < 0) return r; if (idx >= dev->nvqs) return -ENOBUFS; idx = array_index_nospec(idx, dev->nvqs); *vq = dev->vqs[idx]; *id = idx; return 0; } /* Caller must have device mutex */ long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, void __user *argp) { struct vhost_vring_worker ring_worker; struct vhost_worker_state state; struct vhost_worker *worker; struct vhost_virtqueue *vq; long ret; u32 idx; if (!dev->use_worker) return -EINVAL; if (!vhost_dev_has_owner(dev)) return -EINVAL; ret = vhost_dev_check_owner(dev); if (ret) return ret; switch (ioctl) { /* dev worker ioctls */ case VHOST_NEW_WORKER: /* * vhost_tasks will account for worker threads under the parent's * NPROC value but kthreads do not. To avoid userspace overflowing * the system with worker threads fork_owner must be true. */ if (!dev->fork_owner) return -EFAULT; ret = vhost_new_worker(dev, &state); if (!ret && copy_to_user(argp, &state, sizeof(state))) ret = -EFAULT; return ret; case VHOST_FREE_WORKER: if (copy_from_user(&state, argp, sizeof(state))) return -EFAULT; return vhost_free_worker(dev, &state); /* vring worker ioctls */ case VHOST_ATTACH_VRING_WORKER: case VHOST_GET_VRING_WORKER: break; default: return -ENOIOCTLCMD; } ret = vhost_get_vq_from_user(dev, argp, &vq, &idx); if (ret) return ret; switch (ioctl) { case VHOST_ATTACH_VRING_WORKER: if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) { ret = -EFAULT; break; } ret = vhost_vq_attach_worker(vq, &ring_worker); break; case VHOST_GET_VRING_WORKER: worker = rcu_dereference_check(vq->worker, lockdep_is_held(&dev->mutex)); if (!worker) { ret = -EINVAL; break; } ring_worker.index = idx; ring_worker.worker_id = worker->id; if (copy_to_user(argp, &ring_worker, sizeof(ring_worker))) ret = -EFAULT; break; default: ret = -ENOIOCTLCMD; break; } return ret; } EXPORT_SYMBOL_GPL(vhost_worker_ioctl); /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { struct vhost_worker *worker; int err, i; /* Is there an owner already? */ if (vhost_dev_has_owner(dev)) { err = -EBUSY; goto err_mm; } vhost_attach_mm(dev); err = vhost_dev_alloc_iovecs(dev); if (err) goto err_iovecs; if (dev->use_worker) { /* * This should be done last, because vsock can queue work * before VHOST_SET_OWNER so it simplifies the failure path * below since we don't have to worry about vsock queueing * while we free the worker. */ worker = vhost_worker_create(dev); if (!worker) { err = -ENOMEM; goto err_worker; } for (i = 0; i < dev->nvqs; i++) __vhost_vq_attach_worker(dev->vqs[i], worker); } return 0; err_worker: vhost_dev_free_iovecs(dev); err_iovecs: vhost_detach_mm(dev); err_mm: return err; } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); static struct vhost_iotlb *iotlb_alloc(void) { return vhost_iotlb_alloc(max_iotlb_entries, VHOST_IOTLB_FLAG_RETIRE); } struct vhost_iotlb *vhost_dev_reset_owner_prepare(void) { return iotlb_alloc(); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) { int i; vhost_dev_cleanup(dev); dev->fork_owner = fork_from_owner_default; dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. */ for (i = 0; i < dev->nvqs; ++i) dev->vqs[i]->umem = umem; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); void vhost_dev_stop(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) vhost_poll_stop(&dev->vqs[i]->poll); } vhost_dev_flush(dev); } EXPORT_SYMBOL_GPL(vhost_dev_stop); void vhost_clear_msg(struct vhost_dev *dev) { struct vhost_msg_node *node, *n; spin_lock(&dev->iotlb_lock); list_for_each_entry_safe(node, n, &dev->read_list, node) { list_del(&node->node); kfree(node); } list_for_each_entry_safe(node, n, &dev->pending_list, node) { list_del(&node->node); kfree(node); } spin_unlock(&dev->iotlb_lock); } EXPORT_SYMBOL_GPL(vhost_clear_msg); void vhost_dev_cleanup(struct vhost_dev *dev) { int i; for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->error_ctx) eventfd_ctx_put(dev->vqs[i]->error_ctx); if (dev->vqs[i]->kick) fput(dev->vqs[i]->kick); if (dev->vqs[i]->call_ctx.ctx) eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); if (dev->log_ctx) eventfd_ctx_put(dev->log_ctx); dev->log_ctx = NULL; /* No one will access memory at this point */ vhost_iotlb_free(dev->umem); dev->umem = NULL; vhost_iotlb_free(dev->iotlb); dev->iotlb = NULL; vhost_clear_msg(dev); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); vhost_workers_free(dev); vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) { u64 a = addr / VHOST_PAGE_SIZE / 8; /* Make sure 64 bit math will not overflow. */ if (a > ULONG_MAX - (unsigned long)log_base || a + (unsigned long)log_base > ULONG_MAX) return false; return access_ok(log_base + a, (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } /* Make sure 64 bit math will not overflow. */ static bool vhost_overflow(u64 uaddr, u64 size) { if (uaddr > ULONG_MAX || size > ULONG_MAX) return true; if (!size) return false; return uaddr > ULONG_MAX - size + 1; } /* Caller should have vq mutex and device mutex. */ static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, int log_all) { struct vhost_iotlb_map *map; if (!umem) return false; list_for_each_entry(map, &umem->list, link) { unsigned long a = map->addr; if (vhost_overflow(map->addr, map->size)) return false; if (!access_ok((void __user *)a, map->size)) return false; else if (log_all && !log_access_ok(log_base, map->start, map->size)) return false; } return true; } static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, u64 addr, unsigned int size, int type) { const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; if (!map) return NULL; return (void __user *)(uintptr_t)(map->addr + addr - map->start); } /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, int log_all) { int i; for (i = 0; i < d->nvqs; ++i) { bool ok; bool log; mutex_lock(&d->vqs[i]->mutex); log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) ok = vq_memory_access_ok(d->vqs[i]->log_base, umem, log); else ok = true; mutex_unlock(&d->vqs[i]->mutex); if (!ok) return false; } return true; } static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size, int access); static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, const void *from, unsigned size) { int ret; if (!vq->iotlb) return __copy_to_user(to, from, size); else { /* This function should be called after iotlb * prefetch, which means we're sure that all vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ struct iov_iter t; void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)to, size, VHOST_ADDR_USED); if (uaddr) return __copy_to_user(uaddr, from, size); ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_WO); if (ret < 0) goto out; iov_iter_init(&t, ITER_DEST, vq->iotlb_iov, ret, size); ret = copy_to_iter(from, size, &t); if (ret == size) ret = 0; } out: return ret; } static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, void __user *from, unsigned size) { int ret; if (!vq->iotlb) return __copy_from_user(to, from, size); else { /* This function should be called after iotlb * prefetch, which means we're sure that vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)from, size, VHOST_ADDR_DESC); struct iov_iter f; if (uaddr) return __copy_from_user(to, uaddr, size); ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); if (ret < 0) { vq_err(vq, "IOTLB translation failure: uaddr " "%p size 0x%llx\n", from, (unsigned long long) size); goto out; } iov_iter_init(&f, ITER_SOURCE, vq->iotlb_iov, ret, size); ret = copy_from_iter(to, size, &f); if (ret == size) ret = 0; } out: return ret; } static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, void __user *addr, unsigned int size, int type) { int ret; ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); if (ret < 0) { vq_err(vq, "IOTLB translation failure: uaddr " "%p size 0x%llx\n", addr, (unsigned long long) size); return NULL; } if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { vq_err(vq, "Non atomic userspace memory access: uaddr " "%p size 0x%llx\n", addr, (unsigned long long) size); return NULL; } return vq->iotlb_iov[0].iov_base; } /* This function should be called after iotlb * prefetch, which means we're sure that vq * could be access through iotlb. So -EAGAIN should * not happen in this case. */ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, void __user *addr, unsigned int size, int type) { void __user *uaddr = vhost_vq_meta_fetch(vq, (u64)(uintptr_t)addr, size, type); if (uaddr) return uaddr; return __vhost_get_user_slow(vq, addr, size, type); } #define vhost_put_user(vq, x, ptr) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ __typeof__(ptr) to = \ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ sizeof(*ptr), VHOST_ADDR_USED); \ if (to != NULL) \ ret = __put_user(x, to); \ else \ ret = -EFAULT; \ } \ ret; \ }) static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq)); } static inline int vhost_put_used(struct vhost_virtqueue *vq, struct vring_used_elem *head, int idx, int count) { return vhost_copy_to_user(vq, vq->used->ring + idx, head, count * sizeof(*head)); } static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags); } static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) { return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx); } #define vhost_get_user(vq, x, ptr, type) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __get_user(x, ptr); \ } else { \ __typeof__(ptr) from = \ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ sizeof(*ptr), \ type); \ if (from != NULL) \ ret = __get_user(x, from); \ else \ ret = -EFAULT; \ } \ ret; \ }) #define vhost_get_avail(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) #define vhost_get_used(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) mutex_lock_nested(&d->vqs[i]->mutex, i); } static void vhost_dev_unlock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) mutex_unlock(&d->vqs[i]->mutex); } static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq) { __virtio16 idx; int r; r = vhost_get_avail(vq, idx, &vq->avail->idx); if (unlikely(r < 0)) { vq_err(vq, "Failed to access available index at %p (%d)\n", &vq->avail->idx, r); return r; } /* Check it isn't doing very strange thing with available indexes */ vq->avail_idx = vhost16_to_cpu(vq, idx); if (unlikely((u16)(vq->avail_idx - vq->last_avail_idx) > vq->num)) { vq_err(vq, "Invalid available index change from %u to %u", vq->last_avail_idx, vq->avail_idx); return -EINVAL; } /* We're done if there is nothing new */ if (vq->avail_idx == vq->last_avail_idx) return 0; /* * We updated vq->avail_idx so we need a memory barrier between * the index read above and the caller reading avail ring entries. */ smp_rmb(); return 1; } static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, __virtio16 *head, int idx) { return vhost_get_avail(vq, *head, &vq->avail->ring[idx & (vq->num - 1)]); } static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, __virtio16 *flags) { return vhost_get_avail(vq, *flags, &vq->avail->flags); } static inline int vhost_get_used_event(struct vhost_virtqueue *vq, __virtio16 *event) { return vhost_get_avail(vq, *event, vhost_used_event(vq)); } static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { return vhost_get_used(vq, *idx, &vq->used->idx); } static inline int vhost_get_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, int idx) { return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); } static void vhost_iotlb_notify_vq(struct vhost_dev *d, struct vhost_iotlb_msg *msg) { struct vhost_msg_node *node, *n; spin_lock(&d->iotlb_lock); list_for_each_entry_safe(node, n, &d->pending_list, node) { struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; if (msg->iova <= vq_msg->iova && msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { vhost_poll_queue(&node->vq->poll); list_del(&node->node); kfree(node); } } spin_unlock(&d->iotlb_lock); } static bool umem_access_ok(u64 uaddr, u64 size, int access) { unsigned long a = uaddr; /* Make sure 64 bit math will not overflow. */ if (vhost_overflow(uaddr, size)) return false; if ((access & VHOST_ACCESS_RO) && !access_ok((void __user *)a, size)) return false; if ((access & VHOST_ACCESS_WO) && !access_ok((void __user *)a, size)) return false; return true; } static int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg) { int ret = 0; if (asid != 0) return -EINVAL; mutex_lock(&dev->mutex); vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: if (!dev->iotlb) { ret = -EFAULT; break; } if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) { ret = -EFAULT; break; } vhost_vq_meta_reset(dev); if (vhost_iotlb_add_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1, msg->uaddr, msg->perm)) { ret = -ENOMEM; break; } vhost_iotlb_notify_vq(dev, msg); break; case VHOST_IOTLB_INVALIDATE: if (!dev->iotlb) { ret = -EFAULT; break; } vhost_vq_meta_reset(dev); vhost_iotlb_del_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1); break; default: ret = -EINVAL; break; } vhost_dev_unlock_vqs(dev); mutex_unlock(&dev->mutex); return ret; } ssize_t vhost_chr_write_iter(struct vhost_dev *dev, struct iov_iter *from) { struct vhost_iotlb_msg msg; size_t offset; int type, ret; u32 asid = 0; ret = copy_from_iter(&type, sizeof(type), from); if (ret != sizeof(type)) { ret = -EINVAL; goto done; } switch (type) { case VHOST_IOTLB_MSG: /* There maybe a hole after type for V1 message type, * so skip it here. */ offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); break; case VHOST_IOTLB_MSG_V2: if (vhost_backend_has_feature(dev->vqs[0], VHOST_BACKEND_F_IOTLB_ASID)) { ret = copy_from_iter(&asid, sizeof(asid), from); if (ret != sizeof(asid)) { ret = -EINVAL; goto done; } offset = 0; } else offset = sizeof(__u32); break; default: ret = -EINVAL; goto done; } iov_iter_advance(from, offset); ret = copy_from_iter(&msg, sizeof(msg), from); if (ret != sizeof(msg)) { ret = -EINVAL; goto done; } if (msg.type == VHOST_IOTLB_UPDATE && msg.size == 0) { ret = -EINVAL; goto done; } if (dev->msg_handler) ret = dev->msg_handler(dev, asid, &msg); else ret = vhost_process_iotlb_msg(dev, asid, &msg); if (ret) { ret = -EFAULT; goto done; } ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) : sizeof(struct vhost_msg_v2); done: return ret; } EXPORT_SYMBOL(vhost_chr_write_iter); __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev, poll_table *wait) { __poll_t mask = 0; poll_wait(file, &dev->wait, wait); if (!list_empty(&dev->read_list)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } EXPORT_SYMBOL(vhost_chr_poll); ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, int noblock) { DEFINE_WAIT(wait); struct vhost_msg_node *node; ssize_t ret = 0; unsigned size = sizeof(struct vhost_msg); if (iov_iter_count(to) < size) return 0; while (1) { if (!noblock) prepare_to_wait(&dev->wait, &wait, TASK_INTERRUPTIBLE); node = vhost_dequeue_msg(dev, &dev->read_list); if (node) break; if (noblock) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!dev->iotlb) { ret = -EBADFD; break; } schedule(); } if (!noblock) finish_wait(&dev->wait, &wait); if (node) { struct vhost_iotlb_msg *msg; void *start = &node->msg; switch (node->msg.type) { case VHOST_IOTLB_MSG: size = sizeof(node->msg); msg = &node->msg.iotlb; break; case VHOST_IOTLB_MSG_V2: size = sizeof(node->msg_v2); msg = &node->msg_v2.iotlb; break; default: BUG(); break; } ret = copy_to_iter(start, size, to); if (ret != size || msg->type != VHOST_IOTLB_MISS) { kfree(node); return ret; } vhost_enqueue_msg(dev, &dev->pending_list, node); } return ret; } EXPORT_SYMBOL_GPL(vhost_chr_read_iter); static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) { struct vhost_dev *dev = vq->dev; struct vhost_msg_node *node; struct vhost_iotlb_msg *msg; bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2); node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG); if (!node) return -ENOMEM; if (v2) { node->msg_v2.type = VHOST_IOTLB_MSG_V2; msg = &node->msg_v2.iotlb; } else { msg = &node->msg.iotlb; } msg->type = VHOST_IOTLB_MISS; msg->iova = iova; msg->perm = access; vhost_enqueue_msg(dev, &dev->read_list, node); return 0; } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, vring_desc_t __user *desc, vring_avail_t __user *avail, vring_used_t __user *used) { /* If an IOTLB device is present, the vring addresses are * GIOVAs. Access validation occurs at prefetch time. */ if (vq->iotlb) return true; return access_ok(desc, vhost_get_desc_size(vq, num)) && access_ok(avail, vhost_get_avail_size(vq, num)) && access_ok(used, vhost_get_used_size(vq, num)); } static void vhost_vq_meta_update(struct vhost_virtqueue *vq, const struct vhost_iotlb_map *map, int type) { int access = (type == VHOST_ADDR_USED) ? VHOST_ACCESS_WO : VHOST_ACCESS_RO; if (likely(map->perm & access)) vq->meta_iotlb[type] = map; } static bool iotlb_access_ok(struct vhost_virtqueue *vq, int access, u64 addr, u64 len, int type) { const struct vhost_iotlb_map *map; struct vhost_iotlb *umem = vq->iotlb; u64 s = 0, size, orig_addr = addr, last = addr + len - 1; if (vhost_vq_meta_fetch(vq, addr, len, type)) return true; while (len > s) { map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { vhost_iotlb_miss(vq, addr, access); return false; } else if (!(map->perm & access)) { /* Report the possible access violation by * request another translation from userspace. */ return false; } size = map->size - addr + map->start; if (orig_addr == addr && size >= len) vhost_vq_meta_update(vq, map, type); s += size; addr += size; } return true; } int vq_meta_prefetch(struct vhost_virtqueue *vq) { unsigned int num = vq->num; if (!vq->iotlb) return 1; return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, vhost_get_avail_size(vq, num), VHOST_ADDR_AVAIL) && iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, vhost_get_used_size(vq, num), VHOST_ADDR_USED); } EXPORT_SYMBOL_GPL(vq_meta_prefetch); /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ bool vhost_log_access_ok(struct vhost_dev *dev) { return memory_access_ok(dev, dev->umem, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, void __user *log_base, bool log_used, u64 log_addr) { /* If an IOTLB device is present, log_addr is a GIOVA that * will never be logged by log_used(). */ if (vq->iotlb) return true; return !log_used || log_access_ok(log_base, log_addr, vhost_get_used_size(vq, vq->num)); } /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ static bool vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) { return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); } /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ bool vhost_vq_access_ok(struct vhost_virtqueue *vq) { if (!vq_log_access_ok(vq, vq->log_base)) return false; return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { struct vhost_memory mem, *newmem; struct vhost_memory_region *region; struct vhost_iotlb *newumem, *oldumem; unsigned long size = offsetof(struct vhost_memory, regions); int i; if (copy_from_user(&mem, m, size)) return -EFAULT; if (mem.padding) return -EOPNOTSUPP; if (mem.nregions > max_mem_regions) return -E2BIG; newmem = kvzalloc(struct_size(newmem, regions, mem.nregions), GFP_KERNEL); if (!newmem) return -ENOMEM; memcpy(newmem, &mem, size); if (copy_from_user(newmem->regions, m->regions, flex_array_size(newmem, regions, mem.nregions))) { kvfree(newmem); return -EFAULT; } newumem = iotlb_alloc(); if (!newumem) { kvfree(newmem); return -ENOMEM; } for (region = newmem->regions; region < newmem->regions + mem.nregions; region++) { if (vhost_iotlb_add_range(newumem, region->guest_phys_addr, region->guest_phys_addr + region->memory_size - 1, region->userspace_addr, VHOST_MAP_RW)) goto err; } if (!memory_access_ok(d, newumem, 0)) goto err; oldumem = d->umem; d->umem = newumem; /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); d->vqs[i]->umem = newumem; mutex_unlock(&d->vqs[i]->mutex); } kvfree(newmem); vhost_iotlb_free(oldumem); return 0; err: vhost_iotlb_free(newumem); kvfree(newmem); return -EFAULT; } static long vhost_vring_set_num(struct vhost_dev *d, struct vhost_virtqueue *vq, void __user *argp) { struct vhost_vring_state s; /* Resizing ring with an active backend? * You don't want to do that. */ if (vq->private_data) return -EBUSY; if (copy_from_user(&s, argp, sizeof s)) return -EFAULT; if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) return -EINVAL; vq->num = s.num; return 0; } static long vhost_vring_set_addr(struct vhost_dev *d, struct vhost_virtqueue *vq, void __user *argp) { struct vhost_vring_addr a; if (copy_from_user(&a, argp, sizeof a)) return -EFAULT; if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) return -EOPNOTSUPP; /* For 32bit, verify that the top 32bits of the user data are set to zero. */ if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || (u64)(unsigned long)a.used_user_addr != a.used_user_addr || (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) return -EFAULT; /* Make sure it's safe to cast pointers to vring types. */ BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) return -EINVAL; /* We only verify access here if backend is configured. * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { if (!vq_access_ok(vq, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) return -EINVAL; /* Also validate log access for used ring if enabled. */ if (!vq_log_used_access_ok(vq, vq->log_base, a.flags & (0x1 << VHOST_VRING_F_LOG), a.log_guest_addr)) return -EINVAL; } vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); vq->desc = (void __user *)(unsigned long)a.desc_user_addr; vq->avail = (void __user *)(unsigned long)a.avail_user_addr; vq->log_addr = a.log_guest_addr; vq->used = (void __user *)(unsigned long)a.used_user_addr; return 0; } static long vhost_vring_set_num_addr(struct vhost_dev *d, struct vhost_virtqueue *vq, unsigned int ioctl, void __user *argp) { long r; mutex_lock(&vq->mutex); switch (ioctl) { case VHOST_SET_VRING_NUM: r = vhost_vring_set_num(d, vq, argp); break; case VHOST_SET_VRING_ADDR: r = vhost_vring_set_addr(d, vq, argp); break; default: BUG(); } mutex_unlock(&vq->mutex); return r; } long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL; bool pollstart = false, pollstop = false; struct eventfd_ctx *ctx = NULL; struct vhost_virtqueue *vq; struct vhost_vring_state s; struct vhost_vring_file f; u32 idx; long r; r = vhost_get_vq_from_user(d, argp, &vq, &idx); if (r < 0) return r; if (ioctl == VHOST_SET_VRING_NUM || ioctl == VHOST_SET_VRING_ADDR) { return vhost_vring_set_num_addr(d, vq, ioctl, argp); } mutex_lock(&vq->mutex); switch (ioctl) { case VHOST_SET_VRING_BASE: /* Moving base with an active backend? * You don't want to do that. */ if (vq->private_data) { r = -EBUSY; break; } if (copy_from_user(&s, argp, sizeof s)) { r = -EFAULT; break; } if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { vq->next_avail_head = vq->last_avail_idx = s.num & 0xffff; vq->last_used_idx = (s.num >> 16) & 0xffff; } else { if (s.num > 0xffff) { r = -EINVAL; break; } vq->next_avail_head = vq->last_avail_idx = s.num; } /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; break; case VHOST_GET_VRING_BASE: s.index = idx; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); else s.num = vq->last_avail_idx; if (copy_to_user(argp, &s, sizeof s)) r = -EFAULT; break; case VHOST_SET_VRING_KICK: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); if (IS_ERR(eventfp)) { r = PTR_ERR(eventfp); break; } if (eventfp != vq->kick) { pollstop = (filep = vq->kick) != NULL; pollstart = (vq->kick = eventfp) != NULL; } else filep = eventfp; break; case VHOST_SET_VRING_CALL: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, vq->call_ctx.ctx); break; case VHOST_SET_VRING_ERR: if (copy_from_user(&f, argp, sizeof f)) { r = -EFAULT; break; } ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, vq->error_ctx); break; case VHOST_SET_VRING_ENDIAN: r = vhost_set_vring_endian(vq, argp); break; case VHOST_GET_VRING_ENDIAN: r = vhost_get_vring_endian(vq, idx, argp); break; case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: if (copy_from_user(&s, argp, sizeof(s))) { r = -EFAULT; break; } vq->busyloop_timeout = s.num; break; case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: s.index = idx; s.num = vq->busyloop_timeout; if (copy_to_user(argp, &s, sizeof(s))) r = -EFAULT; break; default: r = -ENOIOCTLCMD; } if (pollstop && vq->handle_kick) vhost_poll_stop(&vq->poll); if (!IS_ERR_OR_NULL(ctx)) eventfd_ctx_put(ctx); if (filep) fput(filep); if (pollstart && vq->handle_kick) r = vhost_poll_start(&vq->poll, vq->kick); mutex_unlock(&vq->mutex); if (pollstop && vq->handle_kick) vhost_dev_flush(vq->poll.dev); return r; } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); int vhost_init_device_iotlb(struct vhost_dev *d) { struct vhost_iotlb *niotlb, *oiotlb; int i; niotlb = iotlb_alloc(); if (!niotlb) return -ENOMEM; oiotlb = d->iotlb; d->iotlb = niotlb; for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq = d->vqs[i]; mutex_lock(&vq->mutex); vq->iotlb = niotlb; __vhost_vq_meta_reset(vq); mutex_unlock(&vq->mutex); } vhost_iotlb_free(oiotlb); return 0; } EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct eventfd_ctx *ctx; u64 p; long r; int i, fd; /* If you are not the owner, you can become one */ if (ioctl == VHOST_SET_OWNER) { r = vhost_dev_set_owner(d); goto done; } #ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL if (ioctl == VHOST_SET_FORK_FROM_OWNER) { /* Only allow modification before owner is set */ if (vhost_dev_has_owner(d)) { r = -EBUSY; goto done; } u8 fork_owner_val; if (get_user(fork_owner_val, (u8 __user *)argp)) { r = -EFAULT; goto done; } if (fork_owner_val != VHOST_FORK_OWNER_TASK && fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { r = -EINVAL; goto done; } d->fork_owner = !!fork_owner_val; r = 0; goto done; } if (ioctl == VHOST_GET_FORK_FROM_OWNER) { u8 fork_owner_val = d->fork_owner; if (fork_owner_val != VHOST_FORK_OWNER_TASK && fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { r = -EINVAL; goto done; } if (put_user(fork_owner_val, (u8 __user *)argp)) { r = -EFAULT; goto done; } r = 0; goto done; } #endif /* You must be the owner to do anything else */ r = vhost_dev_check_owner(d); if (r) goto done; switch (ioctl) { case VHOST_SET_MEM_TABLE: r = vhost_set_memory(d, argp); break; case VHOST_SET_LOG_BASE: if (copy_from_user(&p, argp, sizeof p)) { r = -EFAULT; break; } if ((u64)(unsigned long)p != p) { r = -EFAULT; break; } for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq; void __user *base = (void __user *)(unsigned long)p; vq = d->vqs[i]; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ if (vq->private_data && !vq_log_access_ok(vq, base)) r = -EFAULT; else vq->log_base = base; mutex_unlock(&vq->mutex); } break; case VHOST_SET_LOG_FD: r = get_user(fd, (int __user *)argp); if (r < 0) break; ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; } swap(ctx, d->log_ctx); for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); d->vqs[i]->log_ctx = d->log_ctx; mutex_unlock(&d->vqs[i]->mutex); } if (ctx) eventfd_ctx_put(ctx); break; default: r = -ENOIOCTLCMD; break; } done: return r; } EXPORT_SYMBOL_GPL(vhost_dev_ioctl); /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry * returning -EFAULT). See Documentation/arch/x86/exception-tables.rst. */ static int set_bit_to_user(int nr, void __user *addr) { unsigned long log = (unsigned long)addr; struct page *page; void *base; int bit = nr + (log % PAGE_SIZE) * 8; int r; r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); if (r < 0) return r; BUG_ON(r != 1); base = kmap_atomic(page); set_bit(bit, base); kunmap_atomic(base); unpin_user_pages_dirty_lock(&page, 1, true); return 0; } static int log_write(void __user *log_base, u64 write_address, u64 write_length) { u64 write_page = write_address / VHOST_PAGE_SIZE; int r; if (!write_length) return 0; write_length += write_address % VHOST_PAGE_SIZE; for (;;) { u64 base = (u64)(unsigned long)log_base; u64 log = base + write_page / 8; int bit = write_page % 8; if ((u64)(unsigned long)log != log) return -EFAULT; r = set_bit_to_user(bit, (void __user *)(unsigned long)log); if (r < 0) return r; if (write_length <= VHOST_PAGE_SIZE) break; write_length -= VHOST_PAGE_SIZE; write_page += 1; } return r; } static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) { struct vhost_iotlb *umem = vq->umem; struct vhost_iotlb_map *u; u64 start, end, l, min; int r; bool hit = false; while (len) { min = len; /* More than one GPAs can be mapped into a single HVA. So * iterate all possible umems here to be safe. */ list_for_each_entry(u, &umem->list, link) { if (u->addr > hva - 1 + len || u->addr - 1 + u->size < hva) continue; start = max(u->addr, hva); end = min(u->addr - 1 + u->size, hva - 1 + len); l = end - start + 1; r = log_write(vq->log_base, u->start + start - u->addr, l); if (r < 0) return r; hit = true; min = min(l, min); } if (!hit) return -EFAULT; len -= min; hva += min; } return 0; } static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) { struct iovec *iov = vq->log_iov; int i, ret; if (!vq->iotlb) return log_write(vq->log_base, vq->log_addr + used_offset, len); ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, len, iov, 64, VHOST_ACCESS_WO); if (ret < 0) return ret; for (i = 0; i < ret; i++) { ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, iov[i].iov_len); if (ret) return ret; } return 0; } /* * vhost_log_write() - Log in dirty page bitmap * @vq: vhost virtqueue. * @log: Array of dirty memory in GPA. * @log_num: Size of vhost_log arrary. * @len: The total length of memory buffer to log in the dirty bitmap. * Some drivers may only partially use pages shared via the last * vring descriptor (i.e. vhost-net RX buffer). * Use (len == U64_MAX) to indicate the driver would log all * pages of vring descriptors. * @iov: Array of dirty memory in HVA. * @count: Size of iovec array. */ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len, struct iovec *iov, int count) { int i, r; /* Make sure data written is seen before log. */ smp_wmb(); if (vq->iotlb) { for (i = 0; i < count; i++) { r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, iov[i].iov_len); if (r < 0) return r; } return 0; } for (i = 0; i < log_num; ++i) { u64 l = min(log[i].len, len); r = log_write(vq->log_base, log[i].addr, l); if (r < 0) return r; if (len != U64_MAX) len -= l; } if (vq->log_ctx) eventfd_signal(vq->log_ctx); return 0; } EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; if (vhost_put_used_flags(vq)) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ smp_wmb(); /* Log used flag write. */ used = &vq->used->flags; log_used(vq, (used - (void __user *)vq->used), sizeof vq->used->flags); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return 0; } static int vhost_update_avail_event(struct vhost_virtqueue *vq) { if (vhost_put_avail_event(vq)) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; /* Make sure the event is seen before log. */ smp_wmb(); /* Log avail event write */ used = vhost_avail_event(vq); log_used(vq, (used - (void __user *)vq->used), sizeof *vhost_avail_event(vq)); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return 0; } int vhost_vq_init_access(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; bool is_le = vq->is_le; if (!vq->private_data) return 0; vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) goto err; vq->signalled_used_valid = false; if (!vq->iotlb && !access_ok(&vq->used->idx, sizeof vq->used->idx)) { r = -EFAULT; goto err; } r = vhost_get_used_idx(vq, &last_used_idx); if (r) { vq_err(vq, "Can't access used idx at %p\n", &vq->used->idx); goto err; } vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; err: vq->is_le = is_le; return r; } EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size, int access) { const struct vhost_iotlb_map *map; struct vhost_dev *dev = vq->dev; struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; u64 s = 0, last = addr + len - 1; int ret = 0; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { if (umem != dev->iotlb) { ret = -EFAULT; break; } ret = -EAGAIN; break; } else if (!(map->perm & access)) { ret = -EPERM; break; } _iov = iov + ret; size = map->size - addr + map->start; _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) (map->addr + addr - map->start); s += size; addr += size; ++ret; } if (ret == -EAGAIN) vhost_iotlb_miss(vq, addr, access); return ret; } /* Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, * or -1U if we're at the end. */ static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) return -1U; /* Check they're not leading us off end of descriptors. */ next = vhost16_to_cpu(vq, READ_ONCE(desc->next)); return next; } static int get_indirect(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num, struct vring_desc *indirect) { struct vring_desc desc; unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); struct iov_iter from; int ret, access; /* Sanity check */ if (unlikely(len % sizeof desc)) { vq_err(vq, "Invalid length in indirect descriptor: " "len 0x%llx not multiple of 0x%zx\n", (unsigned long long)len, sizeof desc); return -EINVAL; } ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, UIO_MAXIOV, VHOST_ACCESS_RO); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } iov_iter_init(&from, ITER_SOURCE, vq->indirect, ret, len); count = len / sizeof desc; /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ if (unlikely(count > USHRT_MAX + 1)) { vq_err(vq, "Indirect buffer length too big: %d\n", indirect->len); return -E2BIG; } do { unsigned iov_count = *in_num + *out_num; if (unlikely(++found > count)) { vq_err(vq, "Loop detected: last one at %u " "indirect size %u\n", i, count); return -EINVAL; } if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) { vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) access = VHOST_ACCESS_WO; else access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count, access); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d indirect idx %d\n", ret, i); return ret; } /* If this is an input descriptor, increment that count. */ if (access == VHOST_ACCESS_WO) { *in_num += ret; if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ if (unlikely(*in_num)) { vq_err(vq, "Indirect descriptor " "has out after in: idx %d\n", i); return -EINVAL; } *out_num += ret; } } while ((i = next_desc(vq, &desc)) != -1); return 0; } /* This looks in the virtqueue and for the first available buffer, and converts * it to an iovec for convenient access. Since descriptors consist of some * number of output then some number of input descriptors, it's actually two * iovecs, but we pack them into one and note how many of each there were. * * This function returns the descriptor number found, or vq->num (which is * never a valid descriptor number) if none was found. A negative code is * returned on error. */ int vhost_get_vq_desc(struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num, struct vhost_log *log, unsigned int *log_num) { bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); struct vring_desc desc; unsigned int i, head, found = 0; u16 last_avail_idx = vq->last_avail_idx; __virtio16 ring_head; int ret, access, c = 0; if (vq->avail_idx == vq->last_avail_idx) { ret = vhost_get_avail_idx(vq); if (unlikely(ret < 0)) return ret; if (!ret) return vq->num; } if (in_order) head = vq->next_avail_head & (vq->num - 1); else { /* Grab the next descriptor number they're * advertising, and increment the index we've seen. */ if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); return -EFAULT; } head = vhost16_to_cpu(vq, ring_head); } /* If their number is silly, that's an error. */ if (unlikely(head >= vq->num)) { vq_err(vq, "Guest says index %u > %u is available", head, vq->num); return -EINVAL; } /* When we start there are none of either input nor output. */ *out_num = *in_num = 0; if (unlikely(log)) *log_num = 0; i = head; do { unsigned iov_count = *in_num + *out_num; if (unlikely(i >= vq->num)) { vq_err(vq, "Desc index is %u > %u, head = %u", i, vq->num, head); return -EINVAL; } if (unlikely(++found > vq->num)) { vq_err(vq, "Loop detected: last one at %u " "vq size %u head %u\n", i, vq->num, head); return -EINVAL; } ret = vhost_get_desc(vq, &desc, i); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); return -EFAULT; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) { ret = get_indirect(vq, iov, iov_size, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Failure detected " "in indirect descriptor at idx %d\n", i); return ret; } ++c; continue; } if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) access = VHOST_ACCESS_WO; else access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, iov_size - iov_count, access); if (unlikely(ret < 0)) { if (ret != -EAGAIN) vq_err(vq, "Translation failure %d descriptor idx %d\n", ret, i); return ret; } if (access == VHOST_ACCESS_WO) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; } } else { /* If it's an output descriptor, they're all supposed * to come before any input descriptors. */ if (unlikely(*in_num)) { vq_err(vq, "Descriptor has out after in: " "idx %d\n", i); return -EINVAL; } *out_num += ret; } ++c; } while ((i = next_desc(vq, &desc)) != -1); /* On success, increment avail index. */ vq->last_avail_idx++; vq->next_avail_head += c; /* Assume notifications from guest are disabled at this point, * if they aren't we would need to update avail_event index. */ BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } EXPORT_SYMBOL_GPL(vhost_get_vq_desc); /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) { vq->last_avail_idx -= n; } EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); /* After we've used one of their buffers, we tell them about it. We'll then * want to notify the guest, using eventfd. */ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) { struct vring_used_elem heads = { cpu_to_vhost32(vq, head), cpu_to_vhost32(vq, len) }; u16 nheads = 1; return vhost_add_used_n(vq, &heads, &nheads, 1); } EXPORT_SYMBOL_GPL(vhost_add_used); static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { vring_used_elem_t __user *used; u16 old, new; int start; start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; if (vhost_put_used(vq, heads, start, count)) { vq_err(vq, "Failed to write used"); return -EFAULT; } if (unlikely(vq->log_used)) { /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ log_used(vq, ((void __user *)used - (void __user *)vq->used), count * sizeof *used); } old = vq->last_used_idx; new = (vq->last_used_idx += count); /* If the driver never bothers to signal in a very long while, * used index might wrap around. If that happens, invalidate * signalled_used index we stored. TODO: make sure driver * signals at least once in 2^16 and remove this. */ if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) vq->signalled_used_valid = false; return 0; } static int vhost_add_used_n_ooo(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { int start, n, r; start = vq->last_used_idx & (vq->num - 1); n = vq->num - start; if (n < count) { r = __vhost_add_used_n(vq, heads, n); if (r < 0) return r; heads += n; count -= n; } return __vhost_add_used_n(vq, heads, count); } static int vhost_add_used_n_in_order(struct vhost_virtqueue *vq, struct vring_used_elem *heads, const u16 *nheads, unsigned count) { vring_used_elem_t __user *used; u16 old, new = vq->last_used_idx; int start, i; if (!nheads) return -EINVAL; start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; for (i = 0; i < count; i++) { if (vhost_put_used(vq, &heads[i], start, 1)) { vq_err(vq, "Failed to write used"); return -EFAULT; } start += nheads[i]; new += nheads[i]; if (start >= vq->num) start -= vq->num; } if (unlikely(vq->log_used)) { /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ log_used(vq, ((void __user *)used - (void __user *)vq->used), (vq->num - start) * sizeof *used); if (start + count > vq->num) log_used(vq, 0, (start + count - vq->num) * sizeof *used); } old = vq->last_used_idx; vq->last_used_idx = new; /* If the driver never bothers to signal in a very long while, * used index might wrap around. If that happens, invalidate * signalled_used index we stored. TODO: make sure driver * signals at least once in 2^16 and remove this. */ if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) vq->signalled_used_valid = false; return 0; } /* After we've used one of their buffers, we tell them about it. We'll then * want to notify the guest, using eventfd. */ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, u16 *nheads, unsigned count) { bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); int r; if (!in_order || !nheads) r = vhost_add_used_n_ooo(vq, heads, count); else r = vhost_add_used_n_in_order(vq, heads, nheads, count); if (r < 0) return r; /* Make sure buffer is written before we update index. */ smp_wmb(); if (vhost_put_used_idx(vq)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } if (unlikely(vq->log_used)) { /* Make sure used idx is seen before log. */ smp_wmb(); /* Log used index update. */ log_used(vq, offsetof(struct vring_used, idx), sizeof vq->used->idx); if (vq->log_ctx) eventfd_signal(vq->log_ctx); } return r; } EXPORT_SYMBOL_GPL(vhost_add_used_n); static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { __u16 old, new; __virtio16 event; bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && unlikely(vq->avail_idx == vq->last_avail_idx)) return true; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; if (vhost_get_avail_flags(vq, &flags)) { vq_err(vq, "Failed to get flags"); return true; } return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT)); } old = vq->signalled_used; v = vq->signalled_used_valid; new = vq->signalled_used = vq->last_used_idx; vq->signalled_used_valid = true; if (unlikely(!v)) return true; if (vhost_get_used_event(vq, &event)) { vq_err(vq, "Failed to get used event idx"); return true; } return vring_need_event(vhost16_to_cpu(vq, event), new, old); } /* This actually signals the guest, using eventfd. */ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ if (vq->call_ctx.ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx.ctx); } EXPORT_SYMBOL_GPL(vhost_signal); /* And here's the combo meal deal. Supersize me! */ void vhost_add_used_and_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned int head, int len) { vhost_add_used(vq, head, len); vhost_signal(dev, vq); } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal); /* multi-buffer version of vhost_add_used_and_signal */ void vhost_add_used_and_signal_n(struct vhost_dev *dev, struct vhost_virtqueue *vq, struct vring_used_elem *heads, u16 *nheads, unsigned count) { vhost_add_used_n(vq, heads, nheads, count); vhost_signal(dev, vq); } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); /* return true if we're sure that available ring is empty */ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->avail_idx != vq->last_avail_idx) return false; r = vhost_get_avail_idx(vq); /* Note: we treat error as non-empty here */ return r == 0; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) { vq_err(vq, "Failed to enable notification at %p: %d\n", &vq->used->flags, r); return false; } } else { r = vhost_update_avail_event(vq); if (r) { vq_err(vq, "Failed to update avail event index at %p: %d\n", vhost_avail_event(vq), r); return false; } } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); r = vhost_get_avail_idx(vq); /* Note: we treat error as empty here */ if (unlikely(r < 0)) return false; return r; } EXPORT_SYMBOL_GPL(vhost_enable_notify); /* We don't need to be notified again. */ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) vq_err(vq, "Failed to disable notification at %p: %d\n", &vq->used->flags, r); } } EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { /* Make sure all padding within the structure is initialized. */ struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; node->vq = vq; node->msg.type = type; return node; } EXPORT_SYMBOL_GPL(vhost_new_msg); void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, struct vhost_msg_node *node) { spin_lock(&dev->iotlb_lock); list_add_tail(&node->node, head); spin_unlock(&dev->iotlb_lock); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); } EXPORT_SYMBOL_GPL(vhost_enqueue_msg); struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, struct list_head *head) { struct vhost_msg_node *node = NULL; spin_lock(&dev->iotlb_lock); if (!list_empty(head)) { node = list_first_entry(head, struct vhost_msg_node, node); list_del(&node->node); } spin_unlock(&dev->iotlb_lock); return node; } EXPORT_SYMBOL_GPL(vhost_dequeue_msg); void vhost_set_backend_features(struct vhost_dev *dev, u64 features) { struct vhost_virtqueue *vq; int i; mutex_lock(&dev->mutex); for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; mutex_lock(&vq->mutex); vq->acked_backend_features = features; mutex_unlock(&vq->mutex); } mutex_unlock(&dev->mutex); } EXPORT_SYMBOL_GPL(vhost_set_backend_features); static int __init vhost_init(void) { return 0; } static void __exit vhost_exit(void) { } module_init(vhost_init); module_exit(vhost_exit); MODULE_VERSION("0.0.1"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("Host kernel accelerator for virtio"); |
| 5075 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | #undef TRACE_SYSTEM #define TRACE_SYSTEM netlink #if !defined(_TRACE_NETLINK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NETLINK_H #include <linux/tracepoint.h> TRACE_EVENT(netlink_extack, TP_PROTO(const char *msg), TP_ARGS(msg), TP_STRUCT__entry( __string( msg, msg ) ), TP_fast_assign( __assign_str(msg); ), TP_printk("msg=%s", __get_str(msg)) ); #endif /* _TRACE_NETLINK_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 726 728 4 6 14 14 13 7 6 15 15 14 8 7 11 7 5 8 5 2 6 6 11 11 645 647 646 3 3 3 3 3 3 646 2010 2010 647 7 5 3 16 3 13 13 12 5 6 6 6 5 5 1 4 1 2 4 4 8 1 5 5 7 25 3 1 21 20 18 2 2 4 2 7 2 13 2 8 3 17 1 1 1 1 2 1 1 2 2 1 6 4 2 2 1 37 11 2 35 35 34 3 3 3 3 3 5 1 4 2 1 50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 | // SPDX-License-Identifier: GPL-2.0-or-later /** -*- linux-c -*- *********************************************************** * Linux PPP over Ethernet (PPPoX/PPPoE) Sockets * * PPPoX --- Generic PPP encapsulation socket family * PPPoE --- PPP over Ethernet (RFC 2516) * * Version: 0.7.0 * * 070228 : Fix to allow multiple sessions with same remote MAC and same * session id by including the local device ifindex in the * tuple identifying a session. This also ensures packets can't * be injected into a session from interfaces other than the one * specified by userspace. Florian Zumbiehl <florz@florz.de> * (Oh, BTW, this one is YYMMDD, in case you were wondering ...) * 220102 : Fix module use count on failure in pppoe_create, pppox_sk -acme * 030700 : Fixed connect logic to allow for disconnect. * 270700 : Fixed potential SMP problems; we must protect against * simultaneous invocation of ppp_input * and ppp_unregister_channel. * 040800 : Respect reference count mechanisms on net-devices. * 200800 : fix kfree(skb) in pppoe_rcv (acme) * Module reference count is decremented in the right spot now, * guards against sock_put not actually freeing the sk * in pppoe_release. * 051000 : Initialization cleanup. * 111100 : Fix recvmsg. * 050101 : Fix PADT processing. * 140501 : Use pppoe_rcv_core to handle all backlog. (Alexey) * 170701 : Do not lock_sock with rwlock held. (DaveM) * Ignore discovery frames if user has socket * locked. (DaveM) * Ignore return value of dev_queue_xmit in __pppoe_xmit * or else we may kfree an SKB twice. (DaveM) * 190701 : When doing copies of skb's in __pppoe_xmit, always delete * the original skb that was passed in on success, never on * failure. Delete the copy of the skb on failure to avoid * a memory leak. * 081001 : Misc. cleanup (licence string, non-blocking, prevent * reference of device on close). * 121301 : New ppp channels interface; cannot unregister a channel * from interrupts. Thus, we mark the socket as a ZOMBIE * and do the unregistration later. * 081002 : seq_file support for proc stuff -acme * 111602 : Merge all 2.4 fixes into 2.5/2.6 tree. Label 2.5/2.6 * as version 0.7. Spacing cleanup. * Author: Michal Ostrowski <mostrows@speakeasy.net> * Contributors: * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * David S. Miller (davem@redhat.com) * * License: */ #include <linux/string.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/net.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/if_ether.h> #include <linux/if_pppox.h> #include <linux/ppp_channel.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/notifier.h> #include <linux/file.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> #include <linux/uaccess.h> #define PPPOE_HASH_BITS CONFIG_PPPOE_HASH_BITS #define PPPOE_HASH_SIZE (1 << PPPOE_HASH_BITS) #define PPPOE_HASH_MASK (PPPOE_HASH_SIZE - 1) static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb); static const struct proto_ops pppoe_ops; static const struct ppp_channel_ops pppoe_chan_ops; /* per-net private data for this module */ static unsigned int pppoe_net_id __read_mostly; struct pppoe_net { /* * we could use _single_ hash table for all * nets by injecting net id into the hash but * it would increase hash chains and add * a few additional math comparisons messy * as well, moreover in case of SMP less locking * controversy here */ struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE]; spinlock_t hash_lock; }; /* * PPPoE could be in the following stages: * 1) Discovery stage (to obtain remote MAC and Session ID) * 2) Session stage (MAC and SID are known) * * Ethernet frames have a special tag for this but * we use simpler approach based on session id */ static inline bool stage_session(__be16 sid) { return sid != 0; } static inline struct pppoe_net *pppoe_pernet(struct net *net) { return net_generic(net, pppoe_net_id); } static inline int cmp_2_addr(struct pppoe_addr *a, struct pppoe_addr *b) { return a->sid == b->sid && ether_addr_equal(a->remote, b->remote); } static inline int cmp_addr(struct pppoe_addr *a, __be16 sid, char *addr) { return a->sid == sid && ether_addr_equal(a->remote, addr); } #if 8 % PPPOE_HASH_BITS #error 8 must be a multiple of PPPOE_HASH_BITS #endif static int hash_item(__be16 sid, unsigned char *addr) { unsigned char hash = 0; unsigned int i; for (i = 0; i < ETH_ALEN; i++) hash ^= addr[i]; for (i = 0; i < sizeof(sid_t) * 8; i += 8) hash ^= (__force __u32)sid >> i; for (i = 8; (i >>= 1) >= PPPOE_HASH_BITS;) hash ^= hash >> i; return hash & PPPOE_HASH_MASK; } /********************************************************************** * * Set/get/delete/rehash items (internal versions) * **********************************************************************/ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret; ret = rcu_dereference(pn->hash_table[hash]); while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) return ret; ret = rcu_dereference(ret->next); } return NULL; } static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); struct pppox_sock *ret, *first; first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); ret = first; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && ret->pppoe_ifindex == po->pppoe_ifindex) return -EALREADY; ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } RCU_INIT_POINTER(po->next, first); rcu_assign_pointer(pn->hash_table[hash], po); return 0; } static void __delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret, __rcu **src; ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); src = &pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) { struct pppox_sock *next; next = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); rcu_assign_pointer(*src, next); break; } src = &ret->next; ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } } /********************************************************************** * * Set/get/delete/rehash items * **********************************************************************/ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { struct pppox_sock *po; po = __get_item(pn, sid, addr, ifindex); if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt)) po = NULL; return po; } static inline struct pppox_sock *__get_item_by_addr(struct net *net, struct sockaddr_pppox *sp) { struct net_device *dev; struct pppoe_net *pn; struct pppox_sock *pppox_sock = NULL; int ifindex; dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev); if (dev) { ifindex = dev->ifindex; pn = pppoe_pernet(net); pppox_sock = __get_item(pn, sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex); } return pppox_sock; } static inline void delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { spin_lock(&pn->hash_lock); __delete_item(pn, sid, addr, ifindex); spin_unlock(&pn->hash_lock); } /*************************************************************************** * * Handler for device events. * Certain device events require that sockets be unconnected. * **************************************************************************/ static void pppoe_flush_dev(struct net_device *dev) { struct pppoe_net *pn; int i; pn = pppoe_pernet(dev_net(dev)); spin_lock(&pn->hash_lock); for (i = 0; i < PPPOE_HASH_SIZE; i++) { struct pppox_sock *po = rcu_dereference_protected(pn->hash_table[i], lockdep_is_held(&pn->hash_lock)); struct sock *sk; while (po) { while (po && po->pppoe_dev != dev) { po = rcu_dereference_protected(po->next, lockdep_is_held(&pn->hash_lock)); } if (!po) break; sk = sk_pppox(po); /* We always grab the socket lock, followed by the * hash_lock, in that order. Since we should hold the * sock lock while doing any unbinding, we need to * release the lock we're holding. Hold a reference to * the sock so it doesn't disappear as we're jumping * between locks. */ sock_hold(sk); spin_unlock(&pn->hash_lock); lock_sock(sk); if (po->pppoe_dev == dev && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { pppox_unbind_sock(sk); sk->sk_state_change(sk); po->pppoe_dev = NULL; dev_put(dev); } release_sock(sk); sock_put(sk); /* Restart the process from the start of the current * hash chain. We dropped locks so the world may have * change from underneath us. */ BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); spin_lock(&pn->hash_lock); po = rcu_dereference_protected(pn->hash_table[i], lockdep_is_held(&pn->hash_lock)); } } spin_unlock(&pn->hash_lock); } static int pppoe_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEADDR: case NETDEV_CHANGEMTU: /* A change in mtu or address is a bad thing, requiring * LCP re-negotiation. */ case NETDEV_GOING_DOWN: case NETDEV_DOWN: /* Find every socket on this device and kill it. */ pppoe_flush_dev(dev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block pppoe_notifier = { .notifier_call = pppoe_device_event, }; /************************************************************************ * * Do the real work of receiving a PPPoE Session frame. * ***********************************************************************/ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct pppox_sock *relay_po; /* Backlog receive. Semantics of backlog rcv preclude any code from * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state * can't change. */ if (sk->sk_state & PPPOX_BOUND) { ppp_input(&po->chan, skb); } else if (sk->sk_state & PPPOX_RELAY) { relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay); if (relay_po == NULL) goto abort_kfree; if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0) goto abort_kfree; if (!__pppoe_xmit(sk_pppox(relay_po), skb)) goto abort_kfree; } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; } return NET_RX_SUCCESS; abort_kfree: kfree_skb(skb); return NET_RX_DROP; } /************************************************************************ * * Receive wrapper called in BH context. * ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; int len; if (skb->pkt_type == PACKET_OTHERHOST) goto drop; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb_mac_header_len(skb) < ETH_HLEN) goto drop; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; ph = pppoe_hdr(skb); len = ntohs(ph->length); skb_pull_rcsum(skb, sizeof(*ph)); if (skb->len < len) goto drop; if (pskb_trim_rcsum(skb, len)) goto drop; ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); po = __get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (!po) goto drop; return __sk_receive_skb(sk_pppox(po), skb, 0, 1, false); drop: kfree_skb(skb); out: return NET_RX_DROP; } static void pppoe_unbind_sock_work(struct work_struct *work) { struct pppox_sock *po = container_of(work, struct pppox_sock, proto.pppoe.padt_work); struct sock *sk = sk_pppox(po); lock_sock(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); release_sock(sk); sock_put(sk); } /************************************************************************ * * Receive a PPPoE Discovery frame. * This is solely for detection of PADT frames * ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb->pkt_type != PACKET_HOST) goto abort; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; ph = pppoe_hdr(skb); if (ph->code != PADT_CODE) goto abort; pn = pppoe_pernet(dev_net(dev)); po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (po) if (!schedule_work(&po->proto.pppoe.padt_work)) sock_put(sk_pppox(po)); abort: kfree_skb(skb); out: return NET_RX_SUCCESS; /* Lies... :-) */ } static struct packet_type pppoes_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_SES), .func = pppoe_rcv, }; static struct packet_type pppoed_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_DISC), .func = pppoe_disc_rcv, }; static struct proto pppoe_sk_proto __read_mostly = { .name = "PPPOE", .owner = THIS_MODULE, .obj_size = sizeof(struct pppox_sock), }; static void pppoe_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } /*********************************************************************** * * Initialize a new struct sock. * **********************************************************************/ static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); sock_set_flag(sk, SOCK_RCU_FREE); sock->state = SS_UNCONNECTED; sock->ops = &pppoe_ops; sk->sk_backlog_rcv = pppoe_rcv_core; sk->sk_destruct = pppoe_destruct; sk->sk_state = PPPOX_NONE; sk->sk_type = SOCK_STREAM; sk->sk_family = PF_PPPOX; sk->sk_protocol = PX_PROTO_OE; INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work, pppoe_unbind_sock_work); return 0; } static int pppoe_release(struct socket *sock) { struct sock *sk = sock->sk; struct pppox_sock *po; struct pppoe_net *pn; struct net *net = NULL; if (!sk) return 0; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD)) { release_sock(sk); return -EBADF; } po = pppox_sk(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); /* Signal the death of the socket. */ sk->sk_state = PPPOX_DEAD; net = sock_net(sk); pn = pppoe_pernet(net); /* * protect "po" from concurrent updates * on pppoe_flush_dev */ delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); sock_orphan(sk); sock->sk = NULL; release_sock(sk); sock_put(sk); return 0; } static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, int sockaddr_len, int flags) { struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = NULL; struct pppoe_net *pn; struct net *net = NULL; int error; lock_sock(sk); error = -EINVAL; if (sockaddr_len != sizeof(struct sockaddr_pppox)) goto end; if (sp->sa_protocol != PX_PROTO_OE) goto end; /* Check for already bound sockets */ error = -EBUSY; if ((sk->sk_state & PPPOX_CONNECTED) && stage_session(sp->sa_addr.pppoe.sid)) goto end; /* Check for already disconnected sockets, on attempts to disconnect */ error = -EALREADY; if ((sk->sk_state & PPPOX_DEAD) && !stage_session(sp->sa_addr.pppoe.sid)) goto end; error = 0; /* Delete the old binding */ if (stage_session(po->pppoe_pa.sid)) { pppox_unbind_sock(sk); pn = pppoe_pernet(sock_net(sk)); delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } po->pppoe_ifindex = 0; memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa)); memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay)); memset(&po->chan, 0, sizeof(po->chan)); po->next = NULL; po->num = 0; sk->sk_state = PPPOX_NONE; } /* Re-bind in session stage only */ if (stage_session(sp->sa_addr.pppoe.sid)) { error = -ENODEV; net = sock_net(sk); dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev); if (!dev) goto err_put; po->pppoe_dev = dev; po->pppoe_ifindex = dev->ifindex; pn = pppoe_pernet(net); if (!(dev->flags & IFF_UP)) { goto err_put; } memcpy(&po->pppoe_pa, &sp->sa_addr.pppoe, sizeof(struct pppoe_addr)); spin_lock(&pn->hash_lock); error = __set_item(pn, po); spin_unlock(&pn->hash_lock); if (error < 0) goto err_put; po->chan.hdrlen = (sizeof(struct pppoe_hdr) + dev->hard_header_len); po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2; po->chan.private = sk; po->chan.ops = &pppoe_chan_ops; po->chan.direct_xmit = true; error = ppp_register_net_channel(dev_net(dev), &po->chan); if (error) { delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); goto err_put; } sk->sk_state = PPPOX_CONNECTED; } po->num = sp->sa_addr.pppoe.sid; end: release_sock(sk); return error; err_put: if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } goto end; } static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; sp.sa_family = AF_PPPOX; sp.sa_protocol = PX_PROTO_OE; memcpy(&sp.sa_addr.pppoe, &pppox_sk(sock->sk)->pppoe_pa, sizeof(struct pppoe_addr)); memcpy(uaddr, &sp, len); return len; } static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int val; int err; switch (cmd) { case PPPIOCGMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (put_user(po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN, (int __user *)arg)) break; err = 0; break; case PPPIOCSMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (get_user(val, (int __user *)arg)) break; if (val < (po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN)) err = 0; else err = -EINVAL; break; case PPPIOCSFLAGS: err = -EFAULT; if (get_user(val, (int __user *)arg)) break; err = 0; break; case PPPOEIOCSFWD: { struct pppox_sock *relay_po; err = -EBUSY; if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD)) break; err = -ENOTCONN; if (!(sk->sk_state & PPPOX_CONNECTED)) break; /* PPPoE address from the user specifies an outbound PPPoE address which frames are forwarded to */ err = -EFAULT; if (copy_from_user(&po->pppoe_relay, (void __user *)arg, sizeof(struct sockaddr_pppox))) break; err = -EINVAL; if (po->pppoe_relay.sa_family != AF_PPPOX || po->pppoe_relay.sa_protocol != PX_PROTO_OE) break; /* Check that the socket referenced by the address actually exists. */ rcu_read_lock(); relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay); rcu_read_unlock(); if (!relay_po) break; sk->sk_state |= PPPOX_RELAY; err = 0; break; } case PPPOEIOCDFWD: err = -EALREADY; if (!(sk->sk_state & PPPOX_RELAY)) break; sk->sk_state &= ~PPPOX_RELAY; err = 0; break; default: err = -ENOTTY; } return err; } static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { struct sk_buff *skb; struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int error; struct pppoe_hdr hdr; struct pppoe_hdr *ph; struct net_device *dev; char *start; int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { error = -ENOTCONN; goto end; } hdr.ver = 1; hdr.type = 1; hdr.code = 0; hdr.sid = po->num; dev = po->pppoe_dev; error = -EMSGSIZE; if (total_len > (dev->mtu + dev->hard_header_len)) goto end; hlen = LL_RESERVED_SPACE(dev); skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr)); start = (char *)&ph->tag[0]; error = memcpy_from_msg(start, m, total_len); if (error < 0) { kfree_skb(skb); goto end; } error = total_len; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, total_len); memcpy(ph, &hdr, sizeof(struct pppoe_hdr)); ph->length = htons(total_len); dev_queue_xmit(skb); end: release_sock(sk); return error; } /************************************************************************ * * xmit function for internal use. * ***********************************************************************/ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; struct pppoe_hdr *ph; int data_len = skb->len; /* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP * xmit operations conclude prior to an unregistration call. Thus * sk->sk_state cannot change, so we don't need to do lock_sock(). * But, we also can't do a lock_sock since that introduces a potential * deadlock as we'd reverse the lock ordering used when calling * ppp_unregister_channel(). */ if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; if (!dev) goto abort; /* Copy the data if there is no space for the header or if it's * read-only. */ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); skb_reset_network_header(skb); ph = pppoe_hdr(skb); ph->ver = 1; ph->type = 1; ph->code = 0; ph->sid = po->num; ph->length = htons(data_len); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); skb->dev = dev; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, data_len); dev_queue_xmit(skb); return 1; abort: kfree_skb(skb); return 1; } /************************************************************************ * * xmit function called by generic PPP driver * sends PPP frame over PPPoE socket * ***********************************************************************/ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = chan->private; return __pppoe_xmit(sk, skb); } static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path, const struct ppp_channel *chan) { struct sock *sk = chan->private; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED) || !dev) return -1; path->type = DEV_PATH_PPPOE; path->encap.proto = htons(ETH_P_PPP_SES); path->encap.id = be16_to_cpu(po->num); memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); path->dev = ctx->dev; ctx->dev = dev; return 0; } static const struct ppp_channel_ops pppoe_chan_ops = { .start_xmit = pppoe_xmit, .fill_forward_path = pppoe_fill_forward_path, }; static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; if (sk->sk_state & PPPOX_BOUND) return -EIO; skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_msg(skb, 0, m, total_len); if (error == 0) { consume_skb(skb); return total_len; } kfree_skb(skb); return error; } #ifdef CONFIG_PROC_FS static int pppoe_seq_show(struct seq_file *seq, void *v) { struct pppox_sock *po; char *dev_name; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Id Address Device\n"); goto out; } po = v; dev_name = po->pppoe_pa.dev; seq_printf(seq, "%08X %pM %8s\n", po->pppoe_pa.sid, po->pppoe_pa.remote, dev_name); out: return 0; } static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos) { struct pppox_sock *po; int i; for (i = 0; i < PPPOE_HASH_SIZE; i++) { po = rcu_dereference(pn->hash_table[i]); while (po) { if (!pos--) goto out; po = rcu_dereference(po->next); } } out: return po; } static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); loff_t l = *pos; rcu_read_lock(); return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN; } static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); struct pppox_sock *po, *next; ++*pos; if (v == SEQ_START_TOKEN) { po = pppoe_get_idx(pn, 0); goto out; } po = v; next = rcu_dereference(po->next); if (next) po = next; else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); po = NULL; while (++hash < PPPOE_HASH_SIZE) { po = rcu_dereference(pn->hash_table[hash]); if (po) break; } } out: return po; } static void pppoe_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static const struct seq_operations pppoe_seq_ops = { .start = pppoe_seq_start, .next = pppoe_seq_next, .stop = pppoe_seq_stop, .show = pppoe_seq_show, }; #endif /* CONFIG_PROC_FS */ static const struct proto_ops pppoe_ops = { .family = AF_PPPOX, .owner = THIS_MODULE, .release = pppoe_release, .bind = sock_no_bind, .connect = pppoe_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppoe_getname, .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = pppoe_sendmsg, .recvmsg = pppoe_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = pppox_compat_ioctl, #endif }; static const struct pppox_proto pppoe_proto = { .create = pppoe_create, .ioctl = pppoe_ioctl, .owner = THIS_MODULE, }; static __net_init int pppoe_init_net(struct net *net) { struct pppoe_net *pn = pppoe_pernet(net); struct proc_dir_entry *pde; spin_lock_init(&pn->hash_lock); pde = proc_create_net("pppoe", 0444, net->proc_net, &pppoe_seq_ops, sizeof(struct seq_net_private)); #ifdef CONFIG_PROC_FS if (!pde) return -ENOMEM; #endif return 0; } static __net_exit void pppoe_exit_net(struct net *net) { remove_proc_entry("pppoe", net->proc_net); } static struct pernet_operations pppoe_net_ops = { .init = pppoe_init_net, .exit = pppoe_exit_net, .id = &pppoe_net_id, .size = sizeof(struct pppoe_net), }; static int __init pppoe_init(void) { int err; err = register_pernet_device(&pppoe_net_ops); if (err) goto out; err = proto_register(&pppoe_sk_proto, 0); if (err) goto out_unregister_net_ops; err = register_pppox_proto(PX_PROTO_OE, &pppoe_proto); if (err) goto out_unregister_pppoe_proto; dev_add_pack(&pppoes_ptype); dev_add_pack(&pppoed_ptype); register_netdevice_notifier(&pppoe_notifier); return 0; out_unregister_pppoe_proto: proto_unregister(&pppoe_sk_proto); out_unregister_net_ops: unregister_pernet_device(&pppoe_net_ops); out: return err; } static void __exit pppoe_exit(void) { unregister_netdevice_notifier(&pppoe_notifier); dev_remove_pack(&pppoed_ptype); dev_remove_pack(&pppoes_ptype); unregister_pppox_proto(PX_PROTO_OE); proto_unregister(&pppoe_sk_proto); unregister_pernet_device(&pppoe_net_ops); } module_init(pppoe_init); module_exit(pppoe_exit); MODULE_AUTHOR("Michal Ostrowski <mostrows@speakeasy.net>"); MODULE_DESCRIPTION("PPP over Ethernet driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OE); |
| 1795 6 1043 1041 1 1 35 6 27 2 1 1 49 2 1 46 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // SPDX-License-Identifier: GPL-2.0-or-later /* * x86 instruction attribute tables * * Written by Masami Hiramatsu <mhiramat@redhat.com> */ #include <asm/insn.h> /* __ignore_sync_check__ */ /* Attribute tables are generated from opcode map */ #include "inat-tables.c" /* Attribute search APIs */ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) { return inat_primary_table[opcode]; } int inat_get_last_prefix_id(insn_byte_t last_pfx) { insn_attr_t lpfx_attr; lpfx_attr = inat_get_opcode_attribute(last_pfx); return inat_last_prefix_id(lpfx_attr); } insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr) { const insn_attr_t *table; int n; n = inat_escape_id(esc_attr); table = inat_escape_tables[n][0]; if (!table) return 0; if (inat_has_variant(table[opcode]) && lpfx_id) { table = inat_escape_tables[n][lpfx_id]; if (!table) return 0; } return table[opcode]; } insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t grp_attr) { const insn_attr_t *table; int n; n = inat_group_id(grp_attr); table = inat_group_tables[n][0]; if (!table) return inat_group_common_attribute(grp_attr); if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { table = inat_group_tables[n][lpfx_id]; if (!table) return inat_group_common_attribute(grp_attr); } return table[X86_MODRM_REG(modrm)] | inat_group_common_attribute(grp_attr); } insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, insn_byte_t vex_p) { const insn_attr_t *table; if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) return 0; /* At first, this checks the master table */ table = inat_avx_tables[vex_m][0]; if (!table) return 0; if (!inat_is_group(table[opcode]) && vex_p) { /* If this is not a group, get attribute directly */ table = inat_avx_tables[vex_m][vex_p]; if (!table) return 0; } return table[opcode]; } insn_attr_t inat_get_xop_attribute(insn_byte_t opcode, insn_byte_t map_select) { const insn_attr_t *table; if (map_select < X86_XOP_M_MIN || map_select > X86_XOP_M_MAX) return 0; map_select -= X86_XOP_M_MIN; /* At first, this checks the master table */ table = inat_xop_tables[map_select]; if (!table) return 0; return table[opcode]; } |
| 10 10 10 10 10 10 11 11 1 1 1 1 1 1 1 1 1 1 1 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2005-2006 Micronas USA Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/usb.h> #include <linux/i2c.h> #include <asm/byteorder.h> #include <media/i2c/saa7115.h> #include <media/tuner.h> #include <media/i2c/uda1342.h> #include "go7007-priv.h" static unsigned int assume_endura; module_param(assume_endura, int, 0644); MODULE_PARM_DESC(assume_endura, "when probing fails, hardware is a Pelco Endura"); /* #define GO7007_I2C_DEBUG */ /* for debugging the EZ-USB I2C adapter */ #define HPI_STATUS_ADDR 0xFFF4 #define INT_PARAM_ADDR 0xFFF6 #define INT_INDEX_ADDR 0xFFF8 /* * Pipes on EZ-USB interface: * 0 snd - Control * 0 rcv - Control * 2 snd - Download firmware (control) * 4 rcv - Read Interrupt (interrupt) * 6 rcv - Read Video (bulk) * 8 rcv - Read Audio (bulk) */ #define GO7007_USB_EZUSB (1<<0) #define GO7007_USB_EZUSB_I2C (1<<1) struct go7007_usb_board { unsigned int flags; struct go7007_board_info main_info; }; struct go7007_usb { const struct go7007_usb_board *board; struct mutex i2c_lock; struct usb_device *usbdev; struct urb *video_urbs[8]; struct urb *audio_urbs[8]; struct urb *intr_urb; }; /*********************** Product specification data ***********************/ static const struct go7007_usb_board board_matrix_ii = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_SAA7115 | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .num_i2c_devs = 1, .i2c_devs = { { .type = "saa7115", .addr = 0x20, .is_video = 1, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 9, .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, }, }; static const struct go7007_usb_board board_matrix_reload = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV, .num_i2c_devs = 1, .i2c_devs = { { .type = "saa7113", .addr = 0x25, .is_video = 1, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 9, .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, }, }; static const struct go7007_usb_board board_star_trek = { .flags = GO7007_USB_EZUSB | GO7007_USB_EZUSB_I2C, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO, /* | GO7007_BOARD_HAS_TUNER, */ .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_SAA7115 | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .num_i2c_devs = 1, .i2c_devs = { { .type = "saa7115", .addr = 0x20, .is_video = 1, }, }, .num_inputs = 2, .inputs = { /* { * .video_input = 3, * .audio_index = AUDIO_TUNER, * .name = "Tuner", * }, */ { .video_input = 1, /* .audio_index = AUDIO_EXTERN, */ .name = "Composite", }, { .video_input = 8, /* .audio_index = AUDIO_EXTERN, */ .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, }, }; static const struct go7007_usb_board board_px_tv402u = { .flags = GO7007_USB_EZUSB | GO7007_USB_EZUSB_I2C, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_HAS_TUNER, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_SAA7115 | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .num_i2c_devs = 5, .i2c_devs = { { .type = "saa7115", .addr = 0x20, .is_video = 1, }, { .type = "uda1342", .addr = 0x1a, .is_audio = 1, }, { .type = "tuner", .addr = 0x60, }, { .type = "tuner", .addr = 0x43, }, { .type = "sony-btf-mpx", .addr = 0x44, }, }, .num_inputs = 3, .inputs = { { .video_input = 3, .audio_index = 0, .name = "Tuner", }, { .video_input = 1, .audio_index = 1, .name = "Composite", }, { .video_input = 8, .audio_index = 1, .name = "S-Video", }, }, .video_config = SAA7115_IDQ_IS_DEFAULT, .num_aud_inputs = 2, .aud_inputs = { { .audio_input = UDA1342_IN2, .name = "Tuner", }, { .audio_input = UDA1342_IN1, .name = "Line In", }, }, }, }; static const struct go7007_usb_board board_xmen = { .flags = 0, .main_info = { .flags = GO7007_BOARD_USE_ONBOARD_I2C, .hpi_buffer_cap = 0, .sensor_flags = GO7007_SENSOR_VREF_POLAR, .sensor_width = 320, .sensor_height = 240, .sensor_framerate = 30030, .audio_flags = GO7007_AUDIO_ONE_CHANNEL | GO7007_AUDIO_I2S_MODE_3 | GO7007_AUDIO_WORD_14 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_BCLK_POLAR | GO7007_AUDIO_OKI_MODE, .audio_rate = 8000, .audio_bclk_div = 48, .audio_main_div = 1, .num_i2c_devs = 1, .i2c_devs = { { .type = "ov7640", .addr = 0x21, }, }, .num_inputs = 1, .inputs = { { .name = "Camera", }, }, }, }; static const struct go7007_usb_board board_matrix_revolution = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV | GO7007_SENSOR_VBI, .num_i2c_devs = 1, .i2c_devs = { { .type = "tw9903", .is_video = 1, .addr = 0x44, }, }, .num_inputs = 2, .inputs = { { .video_input = 2, .name = "Composite", }, { .video_input = 8, .name = "S-Video", }, }, }, }; #if 0 static const struct go7007_usb_board board_lifeview_lr192 = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_VALID_ENABLE | GO7007_SENSOR_TV | GO7007_SENSOR_VBI | GO7007_SENSOR_SCALING, .num_i2c_devs = 0, .num_inputs = 1, .inputs = { { .video_input = 0, .name = "Composite", }, }, }, }; #endif static const struct go7007_usb_board board_endura = { .flags = 0, .main_info = { .flags = 0, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 8000, .audio_bclk_div = 48, .audio_main_div = 8, .hpi_buffer_cap = 0, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV, .sensor_h_offset = 8, .num_i2c_devs = 0, .num_inputs = 1, .inputs = { { .name = "Camera", }, }, }, }; static const struct go7007_usb_board board_adlink_mpg24 = { .flags = 0, .main_info = { .flags = GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 0, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV | GO7007_SENSOR_VBI, .num_i2c_devs = 1, .i2c_devs = { { .type = "tw2804", .addr = 0x00, /* yes, really */ .flags = I2C_CLIENT_TEN, .is_video = 1, }, }, .num_inputs = 1, .inputs = { { .name = "Composite", }, }, }, }; static const struct go7007_usb_board board_sensoray_2250 = { .flags = GO7007_USB_EZUSB | GO7007_USB_EZUSB_I2C, .main_info = { .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .flags = GO7007_BOARD_HAS_AUDIO, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV, .num_i2c_devs = 1, .i2c_devs = { { .type = "s2250", .addr = 0x43, .is_video = 1, .is_audio = 1, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 1, .name = "S-Video", }, }, .num_aud_inputs = 3, .aud_inputs = { { .audio_input = 0, .name = "Line In", }, { .audio_input = 1, .name = "Mic", }, { .audio_input = 2, .name = "Mic Boost", }, }, }, }; static const struct go7007_usb_board board_ads_usbav_709 = { .flags = GO7007_USB_EZUSB, .main_info = { .flags = GO7007_BOARD_HAS_AUDIO | GO7007_BOARD_USE_ONBOARD_I2C, .audio_flags = GO7007_AUDIO_I2S_MODE_1 | GO7007_AUDIO_I2S_MASTER | GO7007_AUDIO_WORD_16, .audio_rate = 48000, .audio_bclk_div = 8, .audio_main_div = 2, .hpi_buffer_cap = 7, .sensor_flags = GO7007_SENSOR_656 | GO7007_SENSOR_TV | GO7007_SENSOR_VBI, .num_i2c_devs = 1, .i2c_devs = { { .type = "tw9906", .is_video = 1, .addr = 0x44, }, }, .num_inputs = 2, .inputs = { { .video_input = 0, .name = "Composite", }, { .video_input = 10, .name = "S-Video", }, }, }, }; static const struct usb_device_id go7007_usb_id_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x200, /* Revision number of XMen */ .bcdDevice_hi = 0x200, .bInterfaceClass = 255, .bInterfaceSubClass = 0, .bInterfaceProtocol = 255, .driver_info = (kernel_ulong_t)GO7007_BOARDID_XMEN, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x202, /* Revision number of Matrix II */ .bcdDevice_hi = 0x202, .driver_info = (kernel_ulong_t)GO7007_BOARDID_MATRIX_II, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x204, /* Revision number of Matrix */ .bcdDevice_hi = 0x204, /* Reloaded */ .driver_info = (kernel_ulong_t)GO7007_BOARDID_MATRIX_RELOAD, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x205, /* Revision number of XMen-II */ .bcdDevice_hi = 0x205, .bInterfaceClass = 255, .bInterfaceSubClass = 0, .bInterfaceProtocol = 255, .driver_info = (kernel_ulong_t)GO7007_BOARDID_XMEN_II, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x208, /* Revision number of Star Trek */ .bcdDevice_hi = 0x208, .driver_info = (kernel_ulong_t)GO7007_BOARDID_STAR_TREK, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x209, /* Revision number of XMen-III */ .bcdDevice_hi = 0x209, .bInterfaceClass = 255, .bInterfaceSubClass = 0, .bInterfaceProtocol = 255, .driver_info = (kernel_ulong_t)GO7007_BOARDID_XMEN_III, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x0eb1, /* Vendor ID of WIS Technologies */ .idProduct = 0x7007, /* Product ID of GO7007SB chip */ .bcdDevice_lo = 0x210, /* Revision number of Matrix */ .bcdDevice_hi = 0x210, /* Revolution */ .driver_info = (kernel_ulong_t)GO7007_BOARDID_MATRIX_REV, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x093b, /* Vendor ID of Plextor */ .idProduct = 0xa102, /* Product ID of M402U */ .bcdDevice_lo = 0x1, /* revision number of Blueberry */ .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_PX_M402U, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x093b, /* Vendor ID of Plextor */ .idProduct = 0xa104, /* Product ID of TV402U */ .bcdDevice_lo = 0x1, .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_PX_TV402U, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x10fd, /* Vendor ID of Anubis Electronics */ .idProduct = 0xde00, /* Product ID of Lifeview LR192 */ .bcdDevice_lo = 0x1, .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_LIFEVIEW_LR192, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x1943, /* Vendor ID Sensoray */ .idProduct = 0x2250, /* Product ID of 2250/2251 */ .bcdDevice_lo = 0x1, .bcdDevice_hi = 0x1, .driver_info = (kernel_ulong_t)GO7007_BOARDID_SENSORAY_2250, }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, .idVendor = 0x06e1, /* Vendor ID of ADS Technologies */ .idProduct = 0x0709, /* Product ID of DVD Xpress DX2 */ .bcdDevice_lo = 0x204, .bcdDevice_hi = 0x204, .driver_info = (kernel_ulong_t)GO7007_BOARDID_ADS_USBAV_709, }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, go7007_usb_id_table); /********************* Driver for EZ-USB HPI interface *********************/ static int go7007_usb_vendor_request(struct go7007 *go, int request, int value, int index, void *transfer_buffer, int length, int in) { struct go7007_usb *usb = go->hpi_context; int timeout = 5000; if (in) { return usb_control_msg(usb->usbdev, usb_rcvctrlpipe(usb->usbdev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, transfer_buffer, length, timeout); } else { return usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, transfer_buffer, length, timeout); } } static int go7007_usb_interface_reset(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; u16 intr_val, intr_data; if (go->status == STATUS_SHUTDOWN) return -1; /* Reset encoder */ if (go7007_write_interrupt(go, 0x0001, 0x0001) < 0) return -1; msleep(100); if (usb->board->flags & GO7007_USB_EZUSB) { /* Reset buffer in EZ-USB */ pr_debug("resetting EZ-USB buffers\n"); if (go7007_usb_vendor_request(go, 0x10, 0, 0, NULL, 0, 0) < 0 || go7007_usb_vendor_request(go, 0x10, 0, 0, NULL, 0, 0) < 0) return -1; /* Reset encoder again */ if (go7007_write_interrupt(go, 0x0001, 0x0001) < 0) return -1; msleep(100); } /* Wait for an interrupt to indicate successful hardware reset */ if (go7007_read_interrupt(go, &intr_val, &intr_data) < 0 || (intr_val & ~0x1) != 0x55aa) { dev_err(go->dev, "unable to reset the USB interface\n"); return -1; } return 0; } static int go7007_usb_ezusb_write_interrupt(struct go7007 *go, int addr, int data) { struct go7007_usb *usb = go->hpi_context; int i, r; u16 status_reg = 0; int timeout = 500; pr_debug("WriteInterrupt: %04x %04x\n", addr, data); for (i = 0; i < 100; ++i) { r = usb_control_msg(usb->usbdev, usb_rcvctrlpipe(usb->usbdev, 0), 0x14, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0, HPI_STATUS_ADDR, go->usb_buf, sizeof(status_reg), timeout); if (r < 0) break; status_reg = le16_to_cpu(*((__le16 *)go->usb_buf)); if (!(status_reg & 0x0010)) break; msleep(10); } if (r < 0) goto write_int_error; if (i == 100) { dev_err(go->dev, "device is hung, status reg = 0x%04x\n", status_reg); return -1; } r = usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 0), 0x12, USB_TYPE_VENDOR | USB_RECIP_DEVICE, data, INT_PARAM_ADDR, NULL, 0, timeout); if (r < 0) goto write_int_error; r = usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 0), 0x12, USB_TYPE_VENDOR | USB_RECIP_DEVICE, addr, INT_INDEX_ADDR, NULL, 0, timeout); if (r < 0) goto write_int_error; return 0; write_int_error: dev_err(go->dev, "error in WriteInterrupt: %d\n", r); return r; } static int go7007_usb_onboard_write_interrupt(struct go7007 *go, int addr, int data) { struct go7007_usb *usb = go->hpi_context; int r; int timeout = 500; pr_debug("WriteInterrupt: %04x %04x\n", addr, data); go->usb_buf[0] = data & 0xff; go->usb_buf[1] = data >> 8; go->usb_buf[2] = addr & 0xff; go->usb_buf[3] = addr >> 8; go->usb_buf[4] = go->usb_buf[5] = go->usb_buf[6] = go->usb_buf[7] = 0; r = usb_control_msg(usb->usbdev, usb_sndctrlpipe(usb->usbdev, 2), 0x00, USB_TYPE_VENDOR | USB_RECIP_ENDPOINT, 0x55aa, 0xf0f0, go->usb_buf, 8, timeout); if (r < 0) { dev_err(go->dev, "error in WriteInterrupt: %d\n", r); return r; } return 0; } static void go7007_usb_readinterrupt_complete(struct urb *urb) { struct go7007 *go = (struct go7007 *)urb->context; __le16 *regs = (__le16 *)urb->transfer_buffer; int status = urb->status; if (status) { if (status != -ESHUTDOWN && go->status != STATUS_SHUTDOWN) { dev_err(go->dev, "error in read interrupt: %d\n", urb->status); } else { wake_up(&go->interrupt_waitq); return; } } else if (urb->actual_length != urb->transfer_buffer_length) { dev_err(go->dev, "short read in interrupt pipe!\n"); } else { go->interrupt_available = 1; go->interrupt_data = __le16_to_cpu(regs[0]); go->interrupt_value = __le16_to_cpu(regs[1]); pr_debug("ReadInterrupt: %04x %04x\n", go->interrupt_value, go->interrupt_data); } wake_up(&go->interrupt_waitq); } static int go7007_usb_read_interrupt(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; int r; r = usb_submit_urb(usb->intr_urb, GFP_KERNEL); if (r < 0) { dev_err(go->dev, "unable to submit interrupt urb: %d\n", r); return r; } return 0; } static void go7007_usb_read_video_pipe_complete(struct urb *urb) { struct go7007 *go = (struct go7007 *)urb->context; int r, status = urb->status; if (!vb2_is_streaming(&go->vidq)) { wake_up_interruptible(&go->frame_waitq); return; } if (status) { dev_err(go->dev, "error in video pipe: %d\n", status); return; } if (urb->actual_length != urb->transfer_buffer_length) { dev_err(go->dev, "short read in video pipe!\n"); return; } go7007_parse_video_stream(go, urb->transfer_buffer, urb->actual_length); r = usb_submit_urb(urb, GFP_ATOMIC); if (r < 0) dev_err(go->dev, "error in video pipe: %d\n", r); } static void go7007_usb_read_audio_pipe_complete(struct urb *urb) { struct go7007 *go = (struct go7007 *)urb->context; int r, status = urb->status; if (!vb2_is_streaming(&go->vidq)) return; if (status) { dev_err(go->dev, "error in audio pipe: %d\n", status); return; } if (urb->actual_length != urb->transfer_buffer_length) { dev_err(go->dev, "short read in audio pipe!\n"); return; } if (go->audio_deliver != NULL) go->audio_deliver(go, urb->transfer_buffer, urb->actual_length); r = usb_submit_urb(urb, GFP_ATOMIC); if (r < 0) dev_err(go->dev, "error in audio pipe: %d\n", r); } static int go7007_usb_stream_start(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; int i, r; for (i = 0; i < 8; ++i) { r = usb_submit_urb(usb->video_urbs[i], GFP_KERNEL); if (r < 0) { dev_err(go->dev, "error submitting video urb %d: %d\n", i, r); goto video_submit_failed; } } if (!go->audio_enabled) return 0; for (i = 0; i < 8; ++i) { r = usb_submit_urb(usb->audio_urbs[i], GFP_KERNEL); if (r < 0) { dev_err(go->dev, "error submitting audio urb %d: %d\n", i, r); goto audio_submit_failed; } } return 0; audio_submit_failed: for (i = 0; i < 7; ++i) usb_kill_urb(usb->audio_urbs[i]); video_submit_failed: for (i = 0; i < 8; ++i) usb_kill_urb(usb->video_urbs[i]); return -1; } static int go7007_usb_stream_stop(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; int i; if (go->status == STATUS_SHUTDOWN) return 0; for (i = 0; i < 8; ++i) usb_kill_urb(usb->video_urbs[i]); if (go->audio_enabled) for (i = 0; i < 8; ++i) usb_kill_urb(usb->audio_urbs[i]); return 0; } static int go7007_usb_send_firmware(struct go7007 *go, u8 *data, int len) { struct go7007_usb *usb = go->hpi_context; int transferred, pipe; int timeout = 500; pr_debug("DownloadBuffer sending %d bytes\n", len); if (usb->board->flags & GO7007_USB_EZUSB) pipe = usb_sndbulkpipe(usb->usbdev, 2); else pipe = usb_sndbulkpipe(usb->usbdev, 3); return usb_bulk_msg(usb->usbdev, pipe, data, len, &transferred, timeout); } static void go7007_usb_release(struct go7007 *go) { struct go7007_usb *usb = go->hpi_context; struct urb *vurb, *aurb; int i; if (usb->intr_urb) { usb_kill_urb(usb->intr_urb); kfree(usb->intr_urb->transfer_buffer); usb_free_urb(usb->intr_urb); } /* Free USB-related structs */ for (i = 0; i < 8; ++i) { vurb = usb->video_urbs[i]; if (vurb) { usb_kill_urb(vurb); kfree(vurb->transfer_buffer); usb_free_urb(vurb); } aurb = usb->audio_urbs[i]; if (aurb) { usb_kill_urb(aurb); kfree(aurb->transfer_buffer); usb_free_urb(aurb); } } kfree(go->hpi_context); } static const struct go7007_hpi_ops go7007_usb_ezusb_hpi_ops = { .interface_reset = go7007_usb_interface_reset, .write_interrupt = go7007_usb_ezusb_write_interrupt, .read_interrupt = go7007_usb_read_interrupt, .stream_start = go7007_usb_stream_start, .stream_stop = go7007_usb_stream_stop, .send_firmware = go7007_usb_send_firmware, .release = go7007_usb_release, }; static const struct go7007_hpi_ops go7007_usb_onboard_hpi_ops = { .interface_reset = go7007_usb_interface_reset, .write_interrupt = go7007_usb_onboard_write_interrupt, .read_interrupt = go7007_usb_read_interrupt, .stream_start = go7007_usb_stream_start, .stream_stop = go7007_usb_stream_stop, .send_firmware = go7007_usb_send_firmware, .release = go7007_usb_release, }; /********************* Driver for EZ-USB I2C adapter *********************/ static int go7007_usb_i2c_master_xfer(struct i2c_adapter *adapter, struct i2c_msg msgs[], int num) { struct go7007 *go = i2c_get_adapdata(adapter); struct go7007_usb *usb = go->hpi_context; u8 *buf = go->usb_buf; int buf_len, i; int ret = -EIO; if (go->status == STATUS_SHUTDOWN) return -ENODEV; mutex_lock(&usb->i2c_lock); for (i = 0; i < num; ++i) { /* The hardware command is "write some bytes then read some * bytes", so we try to coalesce a write followed by a read * into a single USB transaction */ if (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && (msgs[i + 1].flags & I2C_M_RD)) { #ifdef GO7007_I2C_DEBUG pr_debug("i2c write/read %d/%d bytes on %02x\n", msgs[i].len, msgs[i + 1].len, msgs[i].addr); #endif buf[0] = 0x01; buf[1] = msgs[i].len + 1; buf[2] = msgs[i].addr << 1; memcpy(&buf[3], msgs[i].buf, msgs[i].len); buf_len = msgs[i].len + 3; buf[buf_len++] = msgs[++i].len; } else if (msgs[i].flags & I2C_M_RD) { #ifdef GO7007_I2C_DEBUG pr_debug("i2c read %d bytes on %02x\n", msgs[i].len, msgs[i].addr); #endif buf[0] = 0x01; buf[1] = 1; buf[2] = msgs[i].addr << 1; buf[3] = msgs[i].len; buf_len = 4; } else { #ifdef GO7007_I2C_DEBUG pr_debug("i2c write %d bytes on %02x\n", msgs[i].len, msgs[i].addr); #endif buf[0] = 0x00; buf[1] = msgs[i].len + 1; buf[2] = msgs[i].addr << 1; memcpy(&buf[3], msgs[i].buf, msgs[i].len); buf_len = msgs[i].len + 3; buf[buf_len++] = 0; } if (go7007_usb_vendor_request(go, 0x24, 0, 0, buf, buf_len, 0) < 0) goto i2c_done; if (msgs[i].flags & I2C_M_RD) { memset(buf, 0, msgs[i].len + 1); if (go7007_usb_vendor_request(go, 0x25, 0, 0, buf, msgs[i].len + 1, 1) < 0) goto i2c_done; memcpy(msgs[i].buf, buf + 1, msgs[i].len); } } ret = num; i2c_done: mutex_unlock(&usb->i2c_lock); return ret; } static u32 go7007_usb_functionality(struct i2c_adapter *adapter) { /* No errors are reported by the hardware, so we don't bother * supporting quick writes to avoid confusing probing */ return (I2C_FUNC_SMBUS_EMUL) & ~I2C_FUNC_SMBUS_QUICK; } static const struct i2c_algorithm go7007_usb_algo = { .master_xfer = go7007_usb_i2c_master_xfer, .functionality = go7007_usb_functionality, }; static struct i2c_adapter go7007_usb_adap_templ = { .owner = THIS_MODULE, .name = "WIS GO7007SB EZ-USB", .algo = &go7007_usb_algo, }; /********************* USB add/remove functions *********************/ static int go7007_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct go7007 *go; struct go7007_usb *usb; const struct go7007_usb_board *board; struct usb_device *usbdev = interface_to_usbdev(intf); struct usb_host_endpoint *ep; unsigned num_i2c_devs; char *name; int video_pipe, i, v_urb_len; pr_debug("probing new GO7007 USB board\n"); switch (id->driver_info) { case GO7007_BOARDID_MATRIX_II: name = "WIS Matrix II or compatible"; board = &board_matrix_ii; break; case GO7007_BOARDID_MATRIX_RELOAD: name = "WIS Matrix Reloaded or compatible"; board = &board_matrix_reload; break; case GO7007_BOARDID_MATRIX_REV: name = "WIS Matrix Revolution or compatible"; board = &board_matrix_revolution; break; case GO7007_BOARDID_STAR_TREK: name = "WIS Star Trek or compatible"; board = &board_star_trek; break; case GO7007_BOARDID_XMEN: name = "WIS XMen or compatible"; board = &board_xmen; break; case GO7007_BOARDID_XMEN_II: name = "WIS XMen II or compatible"; board = &board_xmen; break; case GO7007_BOARDID_XMEN_III: name = "WIS XMen III or compatible"; board = &board_xmen; break; case GO7007_BOARDID_PX_M402U: name = "Plextor PX-M402U"; board = &board_matrix_ii; break; case GO7007_BOARDID_PX_TV402U: name = "Plextor PX-TV402U (unknown tuner)"; board = &board_px_tv402u; break; case GO7007_BOARDID_LIFEVIEW_LR192: dev_err(&intf->dev, "The Lifeview TV Walker Ultra is not supported. Sorry!\n"); return -ENODEV; #if 0 name = "Lifeview TV Walker Ultra"; board = &board_lifeview_lr192; #endif break; case GO7007_BOARDID_SENSORAY_2250: dev_info(&intf->dev, "Sensoray 2250 found\n"); name = "Sensoray 2250/2251"; board = &board_sensoray_2250; break; case GO7007_BOARDID_ADS_USBAV_709: name = "ADS Tech DVD Xpress DX2"; board = &board_ads_usbav_709; break; default: dev_err(&intf->dev, "unknown board ID %d!\n", (unsigned int)id->driver_info); return -ENODEV; } go = go7007_alloc(&board->main_info, &intf->dev); if (go == NULL) return -ENOMEM; usb = kzalloc(sizeof(struct go7007_usb), GFP_KERNEL); if (usb == NULL) { kfree(go); return -ENOMEM; } usb->board = board; usb->usbdev = usbdev; usb_make_path(usbdev, go->bus_info, sizeof(go->bus_info)); go->board_id = id->driver_info; strscpy(go->name, name, sizeof(go->name)); if (board->flags & GO7007_USB_EZUSB) go->hpi_ops = &go7007_usb_ezusb_hpi_ops; else go->hpi_ops = &go7007_usb_onboard_hpi_ops; go->hpi_context = usb; ep = usb->usbdev->ep_in[4]; if (!ep) goto allocfail; /* Allocate the URB and buffer for receiving incoming interrupts */ usb->intr_urb = usb_alloc_urb(0, GFP_KERNEL); if (usb->intr_urb == NULL) goto allocfail; usb->intr_urb->transfer_buffer = kmalloc_array(2, sizeof(u16), GFP_KERNEL); if (usb->intr_urb->transfer_buffer == NULL) goto allocfail; if (usb_endpoint_type(&ep->desc) == USB_ENDPOINT_XFER_BULK) usb_fill_bulk_urb(usb->intr_urb, usb->usbdev, usb_rcvbulkpipe(usb->usbdev, 4), usb->intr_urb->transfer_buffer, 2*sizeof(u16), go7007_usb_readinterrupt_complete, go); else usb_fill_int_urb(usb->intr_urb, usb->usbdev, usb_rcvintpipe(usb->usbdev, 4), usb->intr_urb->transfer_buffer, 2*sizeof(u16), go7007_usb_readinterrupt_complete, go, 8); usb_set_intfdata(intf, &go->v4l2_dev); /* Boot the GO7007 */ if (go7007_boot_encoder(go, go->board_info->flags & GO7007_BOARD_USE_ONBOARD_I2C) < 0) goto allocfail; /* Register the EZ-USB I2C adapter, if we're using it */ if (board->flags & GO7007_USB_EZUSB_I2C) { memcpy(&go->i2c_adapter, &go7007_usb_adap_templ, sizeof(go7007_usb_adap_templ)); mutex_init(&usb->i2c_lock); go->i2c_adapter.dev.parent = go->dev; i2c_set_adapdata(&go->i2c_adapter, go); if (i2c_add_adapter(&go->i2c_adapter) < 0) { dev_err(go->dev, "error: i2c_add_adapter failed\n"); goto allocfail; } go->i2c_adapter_online = 1; } /* Pelco and Adlink reused the XMen and XMen-III vendor and product * IDs for their own incompatible designs. We can detect XMen boards * by probing the sensor, but there is no way to probe the sensors on * the Pelco and Adlink designs so we default to the Adlink. If it * is actually a Pelco, the user must set the assume_endura module * parameter. */ if ((go->board_id == GO7007_BOARDID_XMEN || go->board_id == GO7007_BOARDID_XMEN_III) && go->i2c_adapter_online) { union i2c_smbus_data data; /* Check to see if register 0x0A is 0x76 */ i2c_smbus_xfer(&go->i2c_adapter, 0x21, I2C_CLIENT_SCCB, I2C_SMBUS_READ, 0x0A, I2C_SMBUS_BYTE_DATA, &data); if (data.byte != 0x76) { if (assume_endura) { go->board_id = GO7007_BOARDID_ENDURA; usb->board = board = &board_endura; go->board_info = &board->main_info; strscpy(go->name, "Pelco Endura", sizeof(go->name)); } else { u16 channel; /* read channel number from GPIO[1:0] */ if (go7007_read_addr(go, 0x3c81, &channel)) goto allocfail; channel &= 0x3; go->board_id = GO7007_BOARDID_ADLINK_MPG24; usb->board = board = &board_adlink_mpg24; go->board_info = &board->main_info; go->channel_number = channel; snprintf(go->name, sizeof(go->name), "Adlink PCI-MPG24, channel #%d", channel); } go7007_update_board(go); } } num_i2c_devs = go->board_info->num_i2c_devs; /* Probe the tuner model on the TV402U */ if (go->board_id == GO7007_BOARDID_PX_TV402U) { /* Board strapping indicates tuner model */ if (go7007_usb_vendor_request(go, 0x41, 0, 0, go->usb_buf, 3, 1) < 0) { dev_err(go->dev, "GPIO read failed!\n"); goto allocfail; } switch (go->usb_buf[0] >> 6) { case 1: go->tuner_type = TUNER_SONY_BTF_PG472Z; go->std = V4L2_STD_PAL; strscpy(go->name, "Plextor PX-TV402U-EU", sizeof(go->name)); break; case 2: go->tuner_type = TUNER_SONY_BTF_PK467Z; go->std = V4L2_STD_NTSC_M_JP; num_i2c_devs -= 2; strscpy(go->name, "Plextor PX-TV402U-JP", sizeof(go->name)); break; case 3: go->tuner_type = TUNER_SONY_BTF_PB463Z; num_i2c_devs -= 2; strscpy(go->name, "Plextor PX-TV402U-NA", sizeof(go->name)); break; default: pr_debug("unable to detect tuner type!\n"); break; } /* Configure tuner mode selection inputs connected * to the EZ-USB GPIO output pins */ if (go7007_usb_vendor_request(go, 0x40, 0x7f02, 0, NULL, 0, 0) < 0) { dev_err(go->dev, "GPIO write failed!\n"); goto allocfail; } } /* Print a nasty message if the user attempts to use a USB2.0 device in * a USB1.1 port. There will be silent corruption of the stream. */ if ((board->flags & GO7007_USB_EZUSB) && usbdev->speed != USB_SPEED_HIGH) dev_err(go->dev, "*** WARNING *** This device must be connected to a USB 2.0 port! Attempting to capture video through a USB 1.1 port will result in stream corruption, even at low bitrates!\n"); /* Allocate the URBs and buffers for receiving the video stream */ if (board->flags & GO7007_USB_EZUSB) { if (!usb->usbdev->ep_in[6]) goto allocfail; v_urb_len = 1024; video_pipe = usb_rcvbulkpipe(usb->usbdev, 6); } else { if (!usb->usbdev->ep_in[1]) goto allocfail; v_urb_len = 512; video_pipe = usb_rcvbulkpipe(usb->usbdev, 1); } for (i = 0; i < 8; ++i) { usb->video_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (usb->video_urbs[i] == NULL) goto allocfail; usb->video_urbs[i]->transfer_buffer = kmalloc(v_urb_len, GFP_KERNEL); if (usb->video_urbs[i]->transfer_buffer == NULL) goto allocfail; usb_fill_bulk_urb(usb->video_urbs[i], usb->usbdev, video_pipe, usb->video_urbs[i]->transfer_buffer, v_urb_len, go7007_usb_read_video_pipe_complete, go); } /* Allocate the URBs and buffers for receiving the audio stream */ if ((board->flags & GO7007_USB_EZUSB) && (board->main_info.flags & GO7007_BOARD_HAS_AUDIO)) { if (!usb->usbdev->ep_in[8]) goto allocfail; for (i = 0; i < 8; ++i) { usb->audio_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (usb->audio_urbs[i] == NULL) goto allocfail; usb->audio_urbs[i]->transfer_buffer = kmalloc(4096, GFP_KERNEL); if (usb->audio_urbs[i]->transfer_buffer == NULL) goto allocfail; usb_fill_bulk_urb(usb->audio_urbs[i], usb->usbdev, usb_rcvbulkpipe(usb->usbdev, 8), usb->audio_urbs[i]->transfer_buffer, 4096, go7007_usb_read_audio_pipe_complete, go); } } /* Do any final GO7007 initialization, then register the * V4L2 and ALSA interfaces */ if (go7007_register_encoder(go, num_i2c_devs) < 0) goto allocfail; go->status = STATUS_ONLINE; return 0; allocfail: go7007_usb_release(go); kfree(go); return -ENOMEM; } static void go7007_usb_disconnect(struct usb_interface *intf) { struct go7007 *go = to_go7007(usb_get_intfdata(intf)); mutex_lock(&go->queue_lock); mutex_lock(&go->serialize_lock); if (go->audio_enabled) go7007_snd_remove(go); go->status = STATUS_SHUTDOWN; v4l2_device_disconnect(&go->v4l2_dev); video_unregister_device(&go->vdev); mutex_unlock(&go->serialize_lock); mutex_unlock(&go->queue_lock); v4l2_device_put(&go->v4l2_dev); } static struct usb_driver go7007_usb_driver = { .name = "go7007", .probe = go7007_usb_probe, .disconnect = go7007_usb_disconnect, .id_table = go7007_usb_id_table, }; module_usb_driver(go7007_usb_driver); MODULE_DESCRIPTION("WIS GO7007 USB support"); MODULE_LICENSE("GPL v2"); |
| 49 75 24 71 8 8 4 8 48 44 43 48 32 47 10 10 12 47 47 47 12 43 5 28 6 35 42 11 32 16 27 11 5 20 5 28 40 36 48 48 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/isofs/dir.c * * (C) 1992, 1993, 1994 Eric Youngdale Modified for ISO 9660 filesystem. * * (C) 1991 Linus Torvalds - minix filesystem * * Steve Beynon : Missing last directory entries fixed * (stephen@askone.demon.co.uk) : 21st June 1996 * * isofs directory handling functions */ #include <linux/gfp.h> #include "isofs.h" int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) { char * old = de->name; int len = de->name_len[0]; int i; for (i = 0; i < len; i++) { unsigned char c = old[i]; if (!c) break; if (c >= 'A' && c <= 'Z') c |= 0x20; /* lower case */ /* Drop trailing '.;1' (ISO 9660:1988 7.5.1 requires period) */ if (c == '.' && i == len - 3 && old[i + 1] == ';' && old[i + 2] == '1') break; /* Drop trailing ';1' */ if (c == ';' && i == len - 2 && old[i + 1] == '1') break; /* Convert remaining ';' to '.' */ /* Also '/' to '.' (broken Acorn-generated ISO9660 images) */ if (c == ';' || c == '/') c = '.'; new[i] = c; } return i; } /* Acorn extensions written by Matthew Wilcox <willy@infradead.org> 1998 */ int get_acorn_filename(struct iso_directory_record *de, char *retname, struct inode *inode) { int std; unsigned char *chr; int retnamlen = isofs_name_translate(de, retname, inode); if (retnamlen == 0) return 0; std = sizeof(struct iso_directory_record) + de->name_len[0]; if (std & 1) std++; if (de->length[0] - std != 32) return retnamlen; chr = ((unsigned char *) de) + std; if (strncmp(chr, "ARCHIMEDES", 10)) return retnamlen; if ((*retname == '_') && ((chr[19] & 1) == 1)) *retname = '!'; if (((de->flags[0] & 2) == 0) && (chr[13] == 0xff) && ((chr[12] & 0xf0) == 0xf0)) { retname[retnamlen] = ','; sprintf(retname+retnamlen+1, "%3.3x", ((chr[12] & 0xf) << 8) | chr[11]); retnamlen += 4; } return retnamlen; } /* * This should _really_ be cleaned up some day.. */ static int do_isofs_readdir(struct inode *inode, struct file *file, struct dir_context *ctx, char *tmpname, struct iso_directory_record *tmpde) { unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); unsigned char bufbits = ISOFS_BUFFER_BITS(inode); unsigned long block, offset, block_saved, offset_saved; unsigned long inode_number = 0; /* Quiet GCC */ struct buffer_head *bh = NULL; int len; int map; int first_de = 1; char *p = NULL; /* Quiet GCC */ struct iso_directory_record *de; struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); offset = ctx->pos & (bufsize - 1); block = ctx->pos >> bufbits; while (ctx->pos < inode->i_size) { int de_len; if (!bh) { bh = isofs_bread(inode, block); if (!bh) return 0; } de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *)de; /* * If the length byte is zero, we should move on to the next * CDROM sector. If we are at the end of the directory, we * kick out of the while loop. */ if (de_len == 0) { brelse(bh); bh = NULL; ctx->pos = (ctx->pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1); block = ctx->pos >> bufbits; offset = 0; continue; } block_saved = block; offset_saved = offset; offset += de_len; /* Make sure we have a full directory entry */ if (offset >= bufsize) { int slop = bufsize - offset + de_len; memcpy(tmpde, de, slop); offset &= bufsize - 1; block++; brelse(bh); bh = NULL; if (offset) { bh = isofs_bread(inode, block); if (!bh) return 0; memcpy((void *) tmpde + slop, bh->b_data, offset); } de = tmpde; } /* Basic sanity check, whether name doesn't exceed dir entry */ if (de_len < sizeof(struct iso_directory_record) || de_len < de->name_len[0] + sizeof(struct iso_directory_record)) { printk(KERN_NOTICE "iso9660: Corrupted directory entry" " in block %lu of inode %lu\n", block, inode->i_ino); brelse(bh); return -EIO; } if (first_de) { isofs_normalize_block_and_offset(de, &block_saved, &offset_saved); inode_number = isofs_get_ino(block_saved, offset_saved, bufbits); } if (de->flags[-sbi->s_high_sierra] & 0x80) { first_de = 0; ctx->pos += de_len; continue; } first_de = 1; /* Handle the case of the '.' directory */ if (de->name_len[0] == 1 && de->name[0] == 0) { if (!dir_emit_dot(file, ctx)) break; ctx->pos += de_len; continue; } len = 0; /* Handle the case of the '..' directory */ if (de->name_len[0] == 1 && de->name[0] == 1) { if (!dir_emit_dotdot(file, ctx)) break; ctx->pos += de_len; continue; } /* Handle everything else. Do name translation if there is no Rock Ridge NM field. */ /* * Do not report hidden files if so instructed, or associated * files unless instructed to do so */ if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) || (!sbi->s_showassoc && (de->flags[-sbi->s_high_sierra] & 4))) { ctx->pos += de_len; continue; } map = 1; if (sbi->s_rock) { len = get_rock_ridge_filename(de, tmpname, inode); if (len != 0) { /* may be -1 */ p = tmpname; map = 0; } } if (map) { #ifdef CONFIG_JOLIET if (sbi->s_joliet_level) { len = get_joliet_filename(de, tmpname, inode); p = tmpname; } else #endif if (sbi->s_mapping == 'a') { len = get_acorn_filename(de, tmpname, inode); p = tmpname; } else if (sbi->s_mapping == 'n') { len = isofs_name_translate(de, tmpname, inode); p = tmpname; } else { p = de->name; len = de->name_len[0]; } } if (len > 0) { if (!dir_emit(ctx, p, len, inode_number, DT_UNKNOWN)) break; } ctx->pos += de_len; } if (bh) brelse(bh); return 0; } /* * Handle allocation of temporary space for name translation and * handling split directory entries.. The real work is done by * "do_isofs_readdir()". */ static int isofs_readdir(struct file *file, struct dir_context *ctx) { int result; char *tmpname; struct iso_directory_record *tmpde; struct inode *inode = file_inode(file); tmpname = (char *)__get_free_page(GFP_KERNEL); if (tmpname == NULL) return -ENOMEM; tmpde = (struct iso_directory_record *) (tmpname+1024); result = do_isofs_readdir(inode, file, ctx, tmpname, tmpde); free_page((unsigned long) tmpname); return result; } const struct file_operations isofs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = isofs_readdir, }; /* * directories can handle most operations... */ const struct inode_operations isofs_dir_inode_operations = { .lookup = isofs_lookup, }; |
| 827 827 826 826 829 828 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 | // SPDX-License-Identifier: GPL-2.0-only /* * Common framework for low-level network console, dump, and debugger code * * Sep 8 2003 Matt Mackall <mpm@selenic.com> * * based on the netconsole code from: * * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> * Copyright (C) 2002 Red Hat, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/moduleparam.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> #include <linux/if_arp.h> #include <linux/inetdevice.h> #include <linux/inet.h> #include <linux/interrupt.h> #include <linux/netpoll.h> #include <linux/sched.h> #include <linux/delay.h> #include <linux/rcupdate.h> #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/if_vlan.h> #include <net/tcp.h> #include <net/udp.h> #include <net/addrconf.h> #include <net/ndisc.h> #include <net/ip6_checksum.h> #include <linux/unaligned.h> #include <trace/events/napi.h> #include <linux/kconfig.h> /* * We maintain a small pool of fully-sized skbs, to make sure the * message gets out even in extreme OOM situations. */ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define USEC_PER_POLL 50 #define MAX_SKB_SIZE \ (sizeof(struct ethhdr) + \ sizeof(struct iphdr) + \ sizeof(struct udphdr) + \ MAX_UDP_CHUNK) static void zap_completion_queue(void); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { netdev_tx_t status = NETDEV_TX_OK; netdev_features_t features; features = netif_skb_features(skb); if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) { skb = __vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) { /* This is actually a packet drop, but we * don't want the code that calls this * function to try and operate on a NULL skb. */ goto out; } } status = netdev_start_xmit(skb, dev, txq, false); out: return status; } static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; unsigned long flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; struct netdev_queue *txq; unsigned int q_index; if (!netif_device_present(dev) || !netif_running(dev)) { kfree_skb(skb); continue; } local_irq_save(flags); /* check if skb->queue_mapping is still valid */ q_index = skb_get_queue_mapping(skb); if (unlikely(q_index >= dev->real_num_tx_queues)) { q_index = q_index % dev->real_num_tx_queues; skb_set_queue_mapping(skb, q_index); } txq = netdev_get_tx_queue(dev, q_index); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); } } static int netif_local_xmit_active(struct net_device *dev) { int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) return 1; } return 0; } static void poll_one_napi(struct napi_struct *napi) { int work; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need * to abort this operation */ if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) return; /* We explicitly pass the polling call a budget of 0 to * indicate that we are clearing the Tx path only. */ work = napi->poll(napi, 0); WARN_ONCE(work, "%pS exceeded budget in poll\n", napi->poll); trace_napi_poll(napi, work, 0); clear_bit(NAPI_STATE_NPSVC, &napi->state); } static void poll_napi(struct net_device *dev) { struct napi_struct *napi; int cpu = smp_processor_id(); list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) { poll_one_napi(napi); smp_store_release(&napi->poll_owner, -1); } } } void netpoll_poll_dev(struct net_device *dev) { struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); const struct net_device_ops *ops; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity * while changing device state */ if (!ni || down_trylock(&ni->dev_lock)) return; /* Some drivers will take the same locks in poll and xmit, * we can't poll if local CPU is already in xmit. */ if (!netif_running(dev) || netif_local_xmit_active(dev)) { up(&ni->dev_lock); return; } ops = dev->netdev_ops; if (ops->ndo_poll_controller) ops->ndo_poll_controller(dev); poll_napi(dev); up(&ni->dev_lock); zap_completion_queue(); } EXPORT_SYMBOL(netpoll_poll_dev); void netpoll_poll_disable(struct net_device *dev) { struct netpoll_info *ni; might_sleep(); ni = rtnl_dereference(dev->npinfo); if (ni) down(&ni->dev_lock); } void netpoll_poll_enable(struct net_device *dev) { struct netpoll_info *ni; ni = rtnl_dereference(dev->npinfo); if (ni) up(&ni->dev_lock); } static void refill_skbs(struct netpoll *np) { struct sk_buff_head *skb_pool; struct sk_buff *skb; unsigned long flags; skb_pool = &np->skb_pool; spin_lock_irqsave(&skb_pool->lock, flags); while (skb_pool->qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; __skb_queue_tail(skb_pool, skb); } spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) { unsigned long flags; struct softnet_data *sd = &get_cpu_var(softnet_data); if (sd->completion_queue) { struct sk_buff *clist; local_irq_save(flags); clist = sd->completion_queue; sd->completion_queue = NULL; local_irq_restore(flags); while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; if (!skb_irq_freeable(skb)) { refcount_set(&skb->users, 1); dev_kfree_skb_any(skb); /* put this one back */ } else { __kfree_skb(skb); } } } put_cpu_var(softnet_data); } static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { int count = 0; struct sk_buff *skb; zap_completion_queue(); repeat: skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { skb = skb_dequeue(&np->skb_pool); schedule_work(&np->refill_wq); } if (!skb) { if (++count < 10) { netpoll_poll_dev(np->dev); goto repeat; } return NULL; } refcount_set(&skb->users, 1); skb_reserve(skb, reserve); return skb; } static int netpoll_owner_active(struct net_device *dev) { struct napi_struct *napi; list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { if (READ_ONCE(napi->poll_owner) == smp_processor_id()) return 1; } return 0; } /* call with IRQ disabled */ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; netdev_tx_t ret = NET_XMIT_DROP; struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; lockdep_assert_irqs_disabled(); dev = np->dev; rcu_read_lock(); npinfo = rcu_dereference_bh(dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); goto out; } /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; txq = netdev_core_pick_tx(dev, skb, NULL); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (HARD_TX_TRYLOCK(dev, txq)) { if (!netif_xmit_stopped(txq)) status = netpoll_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); if (dev_xmit_complete(status)) break; } /* tickle device maybe there is some cleanup */ netpoll_poll_dev(np->dev); udelay(USEC_PER_POLL); } WARN_ONCE(!irqs_disabled(), "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n", dev->name, dev->netdev_ops->ndo_start_xmit); } if (!dev_xmit_complete(status)) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } ret = NETDEV_TX_OK; out: rcu_read_unlock(); return ret; } static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb, int len) { struct udphdr *udph; int udp_len; udp_len = len + sizeof(struct udphdr); udph = udp_hdr(skb); /* check needs to be set, since it will be consumed in csum_partial */ udph->check = 0; if (np->ipv6) udph->check = csum_ipv6_magic(&np->local_ip.in6, &np->remote_ip.in6, udp_len, IPPROTO_UDP, csum_partial(udph, udp_len, 0)); else udph->check = csum_tcpudp_magic(np->local_ip.ip, np->remote_ip.ip, udp_len, IPPROTO_UDP, csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; } netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { unsigned long flags; netdev_tx_t ret; if (unlikely(!np)) { dev_kfree_skb_irq(skb); ret = NET_XMIT_DROP; } else { local_irq_save(flags); ret = __netpoll_send_skb(np, skb); local_irq_restore(flags); } return ret; } EXPORT_SYMBOL(netpoll_send_skb); static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len) { struct ipv6hdr *ip6h; skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); /* ip6h->version = 6; ip6h->priority = 0; */ *(unsigned char *)ip6h = 0x60; ip6h->flow_lbl[0] = 0; ip6h->flow_lbl[1] = 0; ip6h->flow_lbl[2] = 0; ip6h->payload_len = htons(sizeof(struct udphdr) + len); ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = 32; ip6h->saddr = np->local_ip.in6; ip6h->daddr = np->remote_ip.in6; skb->protocol = htons(ETH_P_IPV6); } static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len) { static atomic_t ip_ident; struct iphdr *iph; int ip_len; ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr); skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); /* iph->version = 4; iph->ihl = 5; */ *(unsigned char *)iph = 0x45; iph->tos = 0; put_unaligned(htons(ip_len), &iph->tot_len); iph->id = htons(atomic_inc_return(&ip_ident)); iph->frag_off = 0; iph->ttl = 64; iph->protocol = IPPROTO_UDP; iph->check = 0; put_unaligned(np->local_ip.ip, &iph->saddr); put_unaligned(np->remote_ip.ip, &iph->daddr); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); skb->protocol = htons(ETH_P_IP); } static void push_udp(struct netpoll *np, struct sk_buff *skb, int len) { struct udphdr *udph; int udp_len; udp_len = len + sizeof(struct udphdr); skb_push(skb, sizeof(struct udphdr)); skb_reset_transport_header(skb); udph = udp_hdr(skb); udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); netpoll_udp_checksum(np, skb, len); } static void push_eth(struct netpoll *np, struct sk_buff *skb) { struct ethhdr *eth; eth = skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); ether_addr_copy(eth->h_source, np->dev->dev_addr); ether_addr_copy(eth->h_dest, np->remote_mac); if (np->ipv6) eth->h_proto = htons(ETH_P_IPV6); else eth->h_proto = htons(ETH_P_IP); } int netpoll_send_udp(struct netpoll *np, const char *msg, int len) { int total_len, ip_len, udp_len; struct sk_buff *skb; if (!IS_ENABLED(CONFIG_PREEMPT_RT)) WARN_ON_ONCE(!irqs_disabled()); udp_len = len + sizeof(struct udphdr); if (np->ipv6) ip_len = udp_len + sizeof(struct ipv6hdr); else ip_len = udp_len + sizeof(struct iphdr); total_len = ip_len + LL_RESERVED_SPACE(np->dev); skb = find_skb(np, total_len + np->dev->needed_tailroom, total_len - len); if (!skb) return -ENOMEM; skb_copy_to_linear_data(skb, msg, len); skb_put(skb, len); push_udp(np, skb, len); if (np->ipv6) push_ipv6(np, skb, len); else push_ipv4(np, skb, len); push_eth(np, skb); skb->dev = np->dev; return (int)netpoll_send_skb(np, skb); } EXPORT_SYMBOL(netpoll_send_udp); static void skb_pool_flush(struct netpoll *np) { struct sk_buff_head *skb_pool; cancel_work_sync(&np->refill_wq); skb_pool = &np->skb_pool; skb_queue_purge_reason(skb_pool, SKB_CONSUMED); } static void refill_skbs_work_handler(struct work_struct *work) { struct netpoll *np = container_of(work, struct netpoll, refill_wq); refill_skbs(np); } int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; const struct net_device_ops *ops; int err; skb_queue_head_init(&np->skb_pool); if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", ndev->name); err = -ENOTSUPP; goto out; } npinfo = rtnl_dereference(ndev->npinfo); if (!npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; goto out; } sema_init(&npinfo->dev_lock, 1); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); refcount_set(&npinfo->refcnt, 1); ops = ndev->netdev_ops; if (ops->ndo_netpoll_setup) { err = ops->ndo_netpoll_setup(ndev); if (err) goto free_npinfo; } } else { refcount_inc(&npinfo->refcnt); } np->dev = ndev; strscpy(np->dev_name, ndev->name, IFNAMSIZ); /* fill up the skb queue */ refill_skbs(np); INIT_WORK(&np->refill_wq, refill_skbs_work_handler); /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); return 0; free_npinfo: kfree(npinfo); out: return err; } EXPORT_SYMBOL_GPL(__netpoll_setup); /* * Returns a pointer to a string representation of the identifier used * to select the egress interface for the given netpoll instance. buf * must be a buffer of length at least MAC_ADDR_STR_LEN + 1. */ static char *egress_dev(struct netpoll *np, char *buf) { if (np->dev_name[0]) return np->dev_name; snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac); return buf; } static void netpoll_wait_carrier(struct netpoll *np, struct net_device *ndev, unsigned int timeout) { unsigned long atmost; atmost = jiffies + timeout * HZ; while (!netif_carrier_ok(ndev)) { if (time_after(jiffies, atmost)) { np_notice(np, "timeout waiting for carrier\n"); break; } msleep(1); } } /* * Take the IPv6 from ndev and populate local_ip structure in netpoll */ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) { char buf[MAC_ADDR_STR_LEN + 1]; int err = -EDESTADDRREQ; struct inet6_dev *idev; if (!IS_ENABLED(CONFIG_IPV6)) { np_err(np, "IPv6 is not supported %s, aborting\n", egress_dev(np, buf)); return -EINVAL; } idev = __in6_dev_get(ndev); if (idev) { struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); list_for_each_entry(ifp, &idev->addr_list, if_list) { if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) != !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL)) continue; /* Got the IP, let's return */ np->local_ip.in6 = ifp->addr; err = 0; break; } read_unlock_bh(&idev->lock); } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", egress_dev(np, buf)); return err; } np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); return 0; } /* * Take the IPv4 from ndev and populate local_ip structure in netpoll */ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) { char buf[MAC_ADDR_STR_LEN + 1]; const struct in_ifaddr *ifa; struct in_device *in_dev; in_dev = __in_dev_get_rtnl(ndev); if (!in_dev) { np_err(np, "no IP address for %s, aborting\n", egress_dev(np, buf)); return -EDESTADDRREQ; } ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa) { np_err(np, "no IP address for %s, aborting\n", egress_dev(np, buf)); return -EDESTADDRREQ; } np->local_ip.ip = ifa->ifa_local; np_info(np, "local IP %pI4\n", &np->local_ip.ip); return 0; } int netpoll_setup(struct netpoll *np) { struct net *net = current->nsproxy->net_ns; char buf[MAC_ADDR_STR_LEN + 1]; struct net_device *ndev = NULL; bool ip_overwritten = false; int err; rtnl_lock(); if (np->dev_name[0]) ndev = __dev_get_by_name(net, np->dev_name); else if (is_valid_ether_addr(np->dev_mac)) ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac); if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf)); err = -ENODEV; goto unlock; } netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL); if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", egress_dev(np, buf)); err = -EBUSY; goto put; } if (!netif_running(ndev)) { np_info(np, "device %s not up yet, forcing it\n", egress_dev(np, buf)); err = dev_open(ndev, NULL); if (err) { np_err(np, "failed to open %s\n", ndev->name); goto put; } rtnl_unlock(); netpoll_wait_carrier(np, ndev, carrier_timeout); rtnl_lock(); } if (!np->local_ip.ip) { if (!np->ipv6) { err = netpoll_take_ipv4(np, ndev); if (err) goto put; } else { err = netpoll_take_ipv6(np, ndev); if (err) goto put; } ip_overwritten = true; } err = __netpoll_setup(np, ndev); if (err) goto flush; rtnl_unlock(); /* Make sure all NAPI polls which started before dev->npinfo * was visible have exited before we start calling NAPI poll. * NAPI skips locking if dev->npinfo is NULL. */ synchronize_rcu(); return 0; flush: skb_pool_flush(np); put: DEBUG_NET_WARN_ON_ONCE(np->dev); if (ip_overwritten) memset(&np->local_ip, 0, sizeof(np->local_ip)); netdev_put(ndev, &np->dev_tracker); unlock: rtnl_unlock(); return err; } EXPORT_SYMBOL(netpoll_setup); static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) { struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ cancel_delayed_work(&npinfo->tx_work); /* clean after last, unfinished work */ __skb_queue_purge(&npinfo->txq); /* now cancel it again */ cancel_delayed_work(&npinfo->tx_work); kfree(npinfo); } static void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; npinfo = rtnl_dereference(np->dev->npinfo); if (!npinfo) return; if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; ops = np->dev->netdev_ops; if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); } else RCU_INIT_POINTER(np->dev->npinfo, NULL); skb_pool_flush(np); } void __netpoll_free(struct netpoll *np) { ASSERT_RTNL(); /* Wait for transmitting packets to finish before freeing. */ synchronize_net(); __netpoll_cleanup(np); kfree(np); } EXPORT_SYMBOL_GPL(__netpoll_free); void do_netpoll_cleanup(struct netpoll *np) { __netpoll_cleanup(np); netdev_put(np->dev, &np->dev_tracker); np->dev = NULL; } EXPORT_SYMBOL(do_netpoll_cleanup); void netpoll_cleanup(struct netpoll *np) { rtnl_lock(); if (!np->dev) goto out; do_netpoll_cleanup(np); out: rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); |
| 1136 1136 76 1064 1006 76 933 1006 1138 856 10 875 350 311 311 310 311 121 1 2 311 127 121 121 311 311 310 310 311 311 311 311 8 48 3 32 12 120 142 269 44 190 121 311 311 311 311 311 310 754 755 172 583 754 604 628 854 767 86 1 85 66 14 5 5 5 3 11 833 1158 3 1162 165 165 165 165 178 2 2 2 2 1 165 2 1 165 4 2 168 2 166 1173 1174 12 3 36 1 833 820 13 12 19 2 583 565 1 1 1 1 1 10 1 11 452 9 448 590 1 2 2 1 584 581 3 583 402 31 44 68 35 180 401 32 551 5 522 57 2 326 133 414 457 7 449 468 2 486 484 2 342 340 2 497 2 1 1 202 117 179 1 486 1 1 5 489 2 1 395 89 2 90 90 99 381 311 172 5 7 332 100 246 343 71 2 2 75 3 2 69 2 60 1 5 50 18 3 4 1 1 1 1 1 1 2 3 5 5 1 3 1 2 157 2 2 3 146 1 2 147 1 12 2 2 8 7 20 3 2 1 1 12 2 2 1 2 2 2 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2017 Facebook */ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/vmalloc.h> #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/rcupdate_trace.h> #include <linux/sched/signal.h> #include <net/bpf_sk_storage.h> #include <net/hotdata.h> #include <net/sock.h> #include <net/tcp.h> #include <net/net_namespace.h> #include <net/page_pool/helpers.h> #include <linux/error-injection.h> #include <linux/smp.h> #include <linux/sock_diag.h> #include <linux/netfilter.h> #include <net/netdev_rx_queue.h> #include <net/xdp.h> #include <net/netfilter/nf_bpf_link.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> struct bpf_test_timer { enum { NO_PREEMPT, NO_MIGRATE } mode; u32 i; u64 time_start, time_spent; }; static void bpf_test_timer_enter(struct bpf_test_timer *t) __acquires(rcu) { rcu_read_lock(); if (t->mode == NO_PREEMPT) preempt_disable(); else migrate_disable(); t->time_start = ktime_get_ns(); } static void bpf_test_timer_leave(struct bpf_test_timer *t) __releases(rcu) { t->time_start = 0; if (t->mode == NO_PREEMPT) preempt_enable(); else migrate_enable(); rcu_read_unlock(); } static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, u32 repeat, int *err, u32 *duration) __must_hold(rcu) { t->i += iterations; if (t->i >= repeat) { /* We're done. */ t->time_spent += ktime_get_ns() - t->time_start; do_div(t->time_spent, t->i); *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent; *err = 0; goto reset; } if (signal_pending(current)) { /* During iteration: we've been cancelled, abort. */ *err = -EINTR; goto reset; } if (need_resched()) { /* During iteration: we need to reschedule between runs. */ t->time_spent += ktime_get_ns() - t->time_start; bpf_test_timer_leave(t); cond_resched(); bpf_test_timer_enter(t); } /* Do another round. */ return true; reset: t->i = 0; return false; } /* We put this struct at the head of each page with a context and frame * initialised when the page is allocated, so we don't have to do this on each * repetition of the test run. */ struct xdp_page_head { struct xdp_buff orig_ctx; struct xdp_buff ctx; union { /* ::data_hard_start starts here */ DECLARE_FLEX_ARRAY(struct xdp_frame, frame); DECLARE_FLEX_ARRAY(u8, data); }; }; struct xdp_test_data { struct xdp_buff *orig_ctx; struct xdp_rxq_info rxq; struct net_device *dev; struct page_pool *pp; struct xdp_frame **frames; struct sk_buff **skbs; struct xdp_mem_info mem; u32 batch_size; u32 frame_cnt; }; /* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE * must be updated accordingly this gets changed, otherwise BPF selftests * will fail. */ #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) #define TEST_XDP_MAX_BATCH 256 static void xdp_test_run_init_page(netmem_ref netmem, void *arg) { struct xdp_page_head *head = phys_to_virt(page_to_phys(netmem_to_page(netmem))); struct xdp_buff *new_ctx, *orig_ctx; u32 headroom = XDP_PACKET_HEADROOM; struct xdp_test_data *xdp = arg; size_t frm_len, meta_len; struct xdp_frame *frm; void *data; orig_ctx = xdp->orig_ctx; frm_len = orig_ctx->data_end - orig_ctx->data_meta; meta_len = orig_ctx->data - orig_ctx->data_meta; headroom -= meta_len; new_ctx = &head->ctx; frm = head->frame; data = head->data; memcpy(data + headroom, orig_ctx->data_meta, frm_len); xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq); xdp_prepare_buff(new_ctx, data, headroom, frm_len, true); new_ctx->data = new_ctx->data_meta + meta_len; xdp_update_frame_from_buff(new_ctx, frm); frm->mem_type = new_ctx->rxq->mem.type; memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx)); } static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx) { struct page_pool *pp; int err = -ENOMEM; struct page_pool_params pp_params = { .order = 0, .flags = 0, .pool_size = xdp->batch_size, .nid = NUMA_NO_NODE, .init_callback = xdp_test_run_init_page, .init_arg = xdp, }; xdp->frames = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL); if (!xdp->frames) return -ENOMEM; xdp->skbs = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL); if (!xdp->skbs) goto err_skbs; pp = page_pool_create(&pp_params); if (IS_ERR(pp)) { err = PTR_ERR(pp); goto err_pp; } /* will copy 'mem.id' into pp->xdp_mem_id */ err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp); if (err) goto err_mmodel; xdp->pp = pp; /* We create a 'fake' RXQ referencing the original dev, but with an * xdp_mem_info pointing to our page_pool */ xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0); xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; xdp->rxq.mem.id = pp->xdp_mem_id; xdp->dev = orig_ctx->rxq->dev; xdp->orig_ctx = orig_ctx; return 0; err_mmodel: page_pool_destroy(pp); err_pp: kvfree(xdp->skbs); err_skbs: kvfree(xdp->frames); return err; } static void xdp_test_run_teardown(struct xdp_test_data *xdp) { xdp_unreg_mem_model(&xdp->mem); page_pool_destroy(xdp->pp); kfree(xdp->frames); kfree(xdp->skbs); } static bool frame_was_changed(const struct xdp_page_head *head) { /* xdp_scrub_frame() zeroes the data pointer, flags is the last field, * i.e. has the highest chances to be overwritten. If those two are * untouched, it's most likely safe to skip the context reset. */ return head->frame->data != head->orig_ctx.data || head->frame->flags != head->orig_ctx.flags; } static bool ctx_was_changed(struct xdp_page_head *head) { return head->orig_ctx.data != head->ctx.data || head->orig_ctx.data_meta != head->ctx.data_meta || head->orig_ctx.data_end != head->ctx.data_end; } static void reset_ctx(struct xdp_page_head *head) { if (likely(!frame_was_changed(head) && !ctx_was_changed(head))) return; head->ctx.data = head->orig_ctx.data; head->ctx.data_meta = head->orig_ctx.data_meta; head->ctx.data_end = head->orig_ctx.data_end; xdp_update_frame_from_buff(&head->ctx, head->frame); head->frame->mem_type = head->orig_ctx.rxq->mem.type; } static int xdp_recv_frames(struct xdp_frame **frames, int nframes, struct sk_buff **skbs, struct net_device *dev) { gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; int i, n; LIST_HEAD(list); n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes, (void **)skbs); if (unlikely(n == 0)) { for (i = 0; i < nframes; i++) xdp_return_frame(frames[i]); return -ENOMEM; } for (i = 0; i < nframes; i++) { struct xdp_frame *xdpf = frames[i]; struct sk_buff *skb = skbs[i]; skb = __xdp_build_skb_from_frame(xdpf, skb, dev); if (!skb) { xdp_return_frame(xdpf); continue; } list_add_tail(&skb->list, &list); } netif_receive_skb_list(&list); return 0; } static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, u32 repeat) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int err = 0, act, ret, i, nframes = 0, batch_sz; struct xdp_frame **frames = xdp->frames; struct bpf_redirect_info *ri; struct xdp_page_head *head; struct xdp_frame *frm; bool redirect = false; struct xdp_buff *ctx; struct page *page; batch_sz = min_t(u32, repeat, xdp->batch_size); local_bh_disable(); bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); ri = bpf_net_ctx_get_ri(); xdp_set_return_frame_no_direct(); for (i = 0; i < batch_sz; i++) { page = page_pool_dev_alloc_pages(xdp->pp); if (!page) { err = -ENOMEM; goto out; } head = phys_to_virt(page_to_phys(page)); reset_ctx(head); ctx = &head->ctx; frm = head->frame; xdp->frame_cnt++; act = bpf_prog_run_xdp(prog, ctx); /* if program changed pkt bounds we need to update the xdp_frame */ if (unlikely(ctx_was_changed(head))) { ret = xdp_update_frame_from_buff(ctx, frm); if (ret) { xdp_return_buff(ctx); continue; } } switch (act) { case XDP_TX: /* we can't do a real XDP_TX since we're not in the * driver, so turn it into a REDIRECT back to the same * index */ ri->tgt_index = xdp->dev->ifindex; ri->map_id = INT_MAX; ri->map_type = BPF_MAP_TYPE_UNSPEC; fallthrough; case XDP_REDIRECT: redirect = true; ret = xdp_do_redirect_frame(xdp->dev, ctx, frm, prog); if (ret) xdp_return_buff(ctx); break; case XDP_PASS: frames[nframes++] = frm; break; default: bpf_warn_invalid_xdp_action(NULL, prog, act); fallthrough; case XDP_DROP: xdp_return_buff(ctx); break; } } out: if (redirect) xdp_do_flush(); if (nframes) { ret = xdp_recv_frames(frames, nframes, xdp->skbs, xdp->dev); if (ret) err = ret; } xdp_clear_return_frame_no_direct(); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); return err; } static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, u32 repeat, u32 batch_size, u32 *time) { struct xdp_test_data xdp = { .batch_size = batch_size }; struct bpf_test_timer t = { .mode = NO_MIGRATE }; int ret; if (!repeat) repeat = 1; ret = xdp_test_run_setup(&xdp, ctx); if (ret) return ret; bpf_test_timer_enter(&t); do { xdp.frame_cnt = 0; ret = xdp_test_run_batch(&xdp, prog, repeat - t.i); if (unlikely(ret < 0)) break; } while (bpf_test_timer_continue(&t, xdp.frame_cnt, repeat, &ret, time)); bpf_test_timer_leave(&t); xdp_test_run_teardown(&xdp); return ret; } static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct bpf_prog_array_item item = {.prog = prog}; struct bpf_run_ctx *old_ctx; struct bpf_cg_run_ctx run_ctx; struct bpf_test_timer t = { NO_MIGRATE }; enum bpf_cgroup_storage_type stype; int ret; for_each_cgroup_storage_type(stype) { item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype); if (IS_ERR(item.cgroup_storage[stype])) { item.cgroup_storage[stype] = NULL; for_each_cgroup_storage_type(stype) bpf_cgroup_storage_free(item.cgroup_storage[stype]); return -ENOMEM; } } if (!repeat) repeat = 1; bpf_test_timer_enter(&t); old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); do { run_ctx.prog_item = &item; local_bh_disable(); bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = bpf_prog_run(prog, ctx); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time)); bpf_reset_run_ctx(old_ctx); bpf_test_timer_leave(&t); for_each_cgroup_storage_type(stype) bpf_cgroup_storage_free(item.cgroup_storage[stype]); return ret; } static int bpf_test_finish(const union bpf_attr *kattr, union bpf_attr __user *uattr, const void *data, struct skb_shared_info *sinfo, u32 size, u32 retval, u32 duration) { void __user *data_out = u64_to_user_ptr(kattr->test.data_out); int err = -EFAULT; u32 copy_size = size; /* Clamp copy if the user has provided a size hint, but copy the full * buffer if not to retain old behaviour. */ if (kattr->test.data_size_out && copy_size > kattr->test.data_size_out) { copy_size = kattr->test.data_size_out; err = -ENOSPC; } if (data_out) { int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size; if (len < 0) { err = -ENOSPC; goto out; } if (copy_to_user(data_out, data, len)) goto out; if (sinfo) { int i, offset = len; u32 data_len; for (i = 0; i < sinfo->nr_frags; i++) { skb_frag_t *frag = &sinfo->frags[i]; if (offset >= copy_size) { err = -ENOSPC; break; } data_len = min_t(u32, copy_size - offset, skb_frag_size(frag)); if (copy_to_user(data_out + offset, skb_frag_address(frag), data_len)) goto out; offset += data_len; } } } if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) goto out; if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) goto out; if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration))) goto out; if (err != -ENOSPC) err = 0; out: trace_bpf_test_finish(&err); return err; } /* Integer types of various sizes and pointer combinations cover variety of * architecture dependent calling conventions. 7+ can be supported in the * future. */ __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_fentry_test1(int a) { return a + 1; } EXPORT_SYMBOL_GPL(bpf_fentry_test1); noinline int bpf_fentry_test2(int a, u64 b) { return a + b; } noinline int bpf_fentry_test3(char a, int b, u64 c) { return a + b + c; } noinline int bpf_fentry_test4(void *a, char b, int c, u64 d) { return (long)a + b + c + d; } noinline int bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e) { return a + (long)b + c + d + e; } noinline int bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) { return a + (long)b + c + d + (long)e + f; } struct bpf_fentry_test_t { struct bpf_fentry_test_t *a; }; noinline int bpf_fentry_test7(struct bpf_fentry_test_t *arg) { asm volatile ("" : "+r"(arg)); return (long)arg; } noinline int bpf_fentry_test8(struct bpf_fentry_test_t *arg) { return (long)arg->a; } __bpf_kfunc u32 bpf_fentry_test9(u32 *a) { return *a; } noinline int bpf_fentry_test10(const void *a) { return (long)a; } noinline void bpf_fentry_test_sinfo(struct skb_shared_info *sinfo) { } __bpf_kfunc int bpf_modify_return_test(int a, int *b) { *b += 1; return a + *b; } __bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d, void *e, char f, int g) { *b += 1; return a + *b + c + d + (long)e + f + g; } __bpf_kfunc int bpf_modify_return_test_tp(int nonce) { trace_bpf_trigger_tp(nonce); return nonce; } noinline int bpf_fentry_shadow_test(int a) { return a + 1; } struct prog_test_member1 { int a; }; struct prog_test_member { struct prog_test_member1 m; int c; }; struct prog_test_ref_kfunc { int a; int b; struct prog_test_member memb; struct prog_test_ref_kfunc *next; refcount_t cnt; }; __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { refcount_dec(&p->cnt); } __bpf_kfunc void bpf_kfunc_call_test_release_dtor(void *p) { bpf_kfunc_call_test_release(p); } CFI_NOSEAL(bpf_kfunc_call_test_release_dtor); __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } __bpf_kfunc void bpf_kfunc_call_memb_release_dtor(void *p) { } CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) BTF_ID_FLAGS(func, bpf_modify_return_test2) BTF_ID_FLAGS(func, bpf_modify_return_test_tp) BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) BTF_KFUNCS_END(bpf_test_modify_return_ids) static const struct btf_kfunc_id_set bpf_test_modify_return_set = { .owner = THIS_MODULE, .set = &bpf_test_modify_return_ids, }; BTF_KFUNCS_START(test_sk_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) BTF_KFUNCS_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) { void __user *data_in = u64_to_user_ptr(kattr->test.data_in); void *data; if (user_size > PAGE_SIZE - headroom - tailroom) return ERR_PTR(-EINVAL); size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); if (copy_from_user(data + headroom, data_in, user_size)) { kfree(data); return ERR_PTR(-EFAULT); } return data; } int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { struct bpf_fentry_test_t arg = {}; u16 side_effect = 0, ret = 0; int b = 2, err = -EFAULT; u32 retval = 0; if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: if (bpf_fentry_test1(1) != 2 || bpf_fentry_test2(2, 3) != 5 || bpf_fentry_test3(4, 5, 6) != 15 || bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || bpf_fentry_test8(&arg) != 0 || bpf_fentry_test9(&retval) != 0 || bpf_fentry_test10((void *)0) != 0) goto out; break; case BPF_MODIFY_RETURN: ret = bpf_modify_return_test(1, &b); if (b != 2) side_effect++; b = 2; ret += bpf_modify_return_test2(1, &b, 3, 4, (void *)5, 6, 7); if (b != 2) side_effect++; break; default: goto out; } retval = ((u32)side_effect << 16) | ret; if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) goto out; err = 0; out: trace_bpf_test_finish(&err); return err; } struct bpf_raw_tp_test_run_info { struct bpf_prog *prog; void *ctx; u32 retval; }; static void __bpf_prog_test_run_raw_tp(void *data) { struct bpf_raw_tp_test_run_info *info = data; struct bpf_trace_run_ctx run_ctx = {}; struct bpf_run_ctx *old_run_ctx; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); rcu_read_lock(); info->retval = bpf_prog_run(info->prog, info->ctx); rcu_read_unlock(); bpf_reset_run_ctx(old_run_ctx); } int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in); __u32 ctx_size_in = kattr->test.ctx_size_in; struct bpf_raw_tp_test_run_info info; int cpu = kattr->test.cpu, err = 0; int current_cpu; /* doesn't support data_in/out, ctx_out, duration, or repeat */ if (kattr->test.data_in || kattr->test.data_out || kattr->test.ctx_out || kattr->test.duration || kattr->test.repeat || kattr->test.batch_size) return -EINVAL; if (ctx_size_in < prog->aux->max_ctx_offset || ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64)) return -EINVAL; if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0) return -EINVAL; if (ctx_size_in) { info.ctx = memdup_user(ctx_in, ctx_size_in); if (IS_ERR(info.ctx)) return PTR_ERR(info.ctx); } else { info.ctx = NULL; } info.prog = prog; current_cpu = get_cpu(); if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 || cpu == current_cpu) { __bpf_prog_test_run_raw_tp(&info); } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { /* smp_call_function_single() also checks cpu_online() * after csd_lock(). However, since cpu is from user * space, let's do an extra quick check to filter out * invalid value before smp_call_function_single(). */ err = -ENXIO; } else { err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp, &info, 1); } put_cpu(); if (!err && copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) err = -EFAULT; kfree(info.ctx); return err; } static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) { void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); u32 size = kattr->test.ctx_size_in; void *data; int err; if (!data_in && !data_out) return NULL; data = kzalloc(max_size, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); if (data_in) { err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size); if (err) { kfree(data); return ERR_PTR(err); } size = min_t(u32, max_size, size); if (copy_from_user(data, data_in, size)) { kfree(data); return ERR_PTR(-EFAULT); } } return data; } static int bpf_ctx_finish(const union bpf_attr *kattr, union bpf_attr __user *uattr, const void *data, u32 size) { void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); int err = -EFAULT; u32 copy_size = size; if (!data || !data_out) return 0; if (copy_size > kattr->test.ctx_size_out) { copy_size = kattr->test.ctx_size_out; err = -ENOSPC; } if (copy_to_user(data_out, data, copy_size)) goto out; if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) goto out; if (err != -ENOSPC) err = 0; out: return err; } /** * range_is_zero - test whether buffer is initialized * @buf: buffer to check * @from: check from this position * @to: check up until (excluding) this position * * This function returns true if the there is a non-zero byte * in the buf in the range [from,to). */ static inline bool range_is_zero(void *buf, size_t from, size_t to) { return !memchr_inv((u8 *)buf + from, 0, to - from); } static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; if (!__skb) return 0; /* make sure the fields we don't use are zeroed */ if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark))) return -EINVAL; /* mark is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark), offsetof(struct __sk_buff, priority))) return -EINVAL; /* priority is allowed */ /* ingress_ifindex is allowed */ /* ifindex is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex), offsetof(struct __sk_buff, cb))) return -EINVAL; /* cb is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb), offsetof(struct __sk_buff, tstamp))) return -EINVAL; /* tstamp is allowed */ /* wire_len is allowed */ /* gso_segs is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), offsetof(struct __sk_buff, gso_size))) return -EINVAL; /* gso_size is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), offsetof(struct __sk_buff, hwtstamp))) return -EINVAL; /* hwtstamp is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp), sizeof(struct __sk_buff))) return -EINVAL; skb->mark = __skb->mark; skb->priority = __skb->priority; skb->skb_iif = __skb->ingress_ifindex; skb->tstamp = __skb->tstamp; memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); if (__skb->wire_len == 0) { cb->pkt_len = skb->len; } else { if (__skb->wire_len < skb->len || __skb->wire_len > GSO_LEGACY_MAX_SIZE) return -EINVAL; cb->pkt_len = __skb->wire_len; } if (__skb->gso_segs > GSO_MAX_SEGS) return -EINVAL; skb_shinfo(skb)->gso_segs = __skb->gso_segs; skb_shinfo(skb)->gso_size = __skb->gso_size; skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp; return 0; } static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; if (!__skb) return; __skb->mark = skb->mark; __skb->priority = skb->priority; __skb->ingress_ifindex = skb->skb_iif; __skb->ifindex = skb->dev->ifindex; __skb->tstamp = skb->tstamp; memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); __skb->wire_len = cb->pkt_len; __skb->gso_segs = skb_shinfo(skb)->gso_segs; __skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp; } static struct proto bpf_dummy_proto = { .name = "bpf_dummy", .owner = THIS_MODULE, .obj_size = sizeof(struct sock), }; int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { bool is_l2 = false, is_direct_pkt_access = false; struct net *net = current->nsproxy->net_ns; struct net_device *dev = net->loopback_dev; u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; u32 retval, duration; int hh_len = ETH_HLEN; struct sk_buff *skb; struct sock *sk; void *data; int ret; if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; if (size < ETH_HLEN) return -EINVAL; data = bpf_test_init(kattr, kattr->test.data_size_in, size, NET_SKB_PAD + NET_IP_ALIGN, SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); if (IS_ERR(data)) return PTR_ERR(data); ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); if (IS_ERR(ctx)) { kfree(data); return PTR_ERR(ctx); } switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: is_l2 = true; fallthrough; case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_CGROUP_SKB: is_direct_pkt_access = true; break; default: break; } sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); if (!sk) { kfree(data); kfree(ctx); return -ENOMEM; } sock_init_data(NULL, sk); skb = slab_build_skb(data); if (!skb) { kfree(data); kfree(ctx); sk_free(sk); return -ENOMEM; } skb->sk = sk; skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); if (ctx && ctx->ifindex > 1) { dev = dev_get_by_index(net, ctx->ifindex); if (!dev) { ret = -ENODEV; goto out; } } skb->protocol = eth_type_trans(skb, dev); skb_reset_network_header(skb); switch (skb->protocol) { case htons(ETH_P_IP): sk->sk_family = AF_INET; if (sizeof(struct iphdr) <= skb_headlen(skb)) { sk->sk_rcv_saddr = ip_hdr(skb)->saddr; sk->sk_daddr = ip_hdr(skb)->daddr; } break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): sk->sk_family = AF_INET6; if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) { sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr; sk->sk_v6_daddr = ipv6_hdr(skb)->daddr; } break; #endif default: break; } if (is_l2) __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); ret = convert___skb_to_skb(skb, ctx); if (ret) goto out; if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { const int off = skb_network_offset(skb); int len = skb->len - off; skb->csum = skb_checksum(skb, off, len, 0); skb->ip_summed = CHECKSUM_COMPLETE; } ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); if (ret) goto out; if (!is_l2) { if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { ret = -ENOMEM; goto out; } } memset(__skb_push(skb, hh_len), 0, hh_len); } if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { const int off = skb_network_offset(skb); int len = skb->len - off; __wsum csum; csum = skb_checksum(skb, off, len, 0); if (csum_fold(skb->csum) != csum_fold(csum)) { ret = -EBADMSG; goto out; } } convert_skb_to___skb(skb, ctx); size = skb->len; /* bpf program can never convert linear skb to non-linear */ if (WARN_ON_ONCE(skb_is_nonlinear(skb))) size = skb_headlen(skb); ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct __sk_buff)); out: if (dev && dev != net->loopback_dev) dev_put(dev); kfree_skb(skb); sk_free(sk); kfree(ctx); return ret; } static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) { unsigned int ingress_ifindex, rx_queue_index; struct netdev_rx_queue *rxqueue; struct net_device *device; if (!xdp_md) return 0; if (xdp_md->egress_ifindex != 0) return -EINVAL; ingress_ifindex = xdp_md->ingress_ifindex; rx_queue_index = xdp_md->rx_queue_index; if (!ingress_ifindex && rx_queue_index) return -EINVAL; if (ingress_ifindex) { device = dev_get_by_index(current->nsproxy->net_ns, ingress_ifindex); if (!device) return -ENODEV; if (rx_queue_index >= device->real_num_rx_queues) goto free_dev; rxqueue = __netif_get_rx_queue(device, rx_queue_index); if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq)) goto free_dev; xdp->rxq = &rxqueue->xdp_rxq; /* The device is now tracked in the xdp->rxq for later * dev_put() */ } xdp->data = xdp->data_meta + xdp_md->data; return 0; free_dev: dev_put(device); return -EINVAL; } static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md) { if (!xdp_md) return; xdp_md->data = xdp->data - xdp->data_meta; xdp_md->data_end = xdp->data_end - xdp->data_meta; if (xdp_md->ingress_ifindex) dev_put(xdp->rxq->dev); } int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES); u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size; u32 linear_sz = kattr->test.data_size_in; u32 batch_size = kattr->test.batch_size; u32 headroom = XDP_PACKET_HEADROOM; u32 repeat = kattr->test.repeat; struct netdev_rx_queue *rxqueue; struct skb_shared_info *sinfo; struct xdp_buff xdp = {}; int i, ret = -EINVAL; struct xdp_md *ctx; void *data; if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES) return -EINVAL; if (bpf_prog_is_dev_bound(prog->aux)) return -EINVAL; if (do_live) { if (!batch_size) batch_size = NAPI_POLL_WEIGHT; else if (batch_size > TEST_XDP_MAX_BATCH) return -E2BIG; headroom += sizeof(struct xdp_page_head); } else if (batch_size) { return -EINVAL; } ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); if (IS_ERR(ctx)) return PTR_ERR(ctx); if (ctx) { /* There can't be user provided data before the meta data */ if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in || ctx->data > ctx->data_end || unlikely(xdp_metalen_invalid(ctx->data)) || (do_live && (kattr->test.data_out || kattr->test.ctx_out))) goto free_ctx; /* Meta data is allocated from the headroom */ headroom -= ctx->data; meta_sz = ctx->data; linear_sz = ctx->data_end; } max_linear_sz = PAGE_SIZE - headroom - tailroom; linear_sz = min_t(u32, linear_sz, max_linear_sz); /* disallow live data mode for jumbo frames */ if (do_live && kattr->test.data_size_in > linear_sz) goto free_ctx; if (kattr->test.data_size_in - meta_sz < ETH_HLEN) return -EINVAL; data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); if (IS_ERR(data)) { ret = PTR_ERR(data); goto free_ctx; } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); rxqueue->xdp_rxq.frag_size = PAGE_SIZE; xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq); xdp_prepare_buff(&xdp, data, headroom, linear_sz, true); sinfo = xdp_get_shared_info_from_buff(&xdp); ret = xdp_convert_md_to_buff(ctx, &xdp); if (ret) goto free_data; size = linear_sz; if (unlikely(kattr->test.data_size_in > size)) { void __user *data_in = u64_to_user_ptr(kattr->test.data_in); while (size < kattr->test.data_size_in) { struct page *page; skb_frag_t *frag; u32 data_len; if (sinfo->nr_frags == MAX_SKB_FRAGS) { ret = -ENOMEM; goto out; } page = alloc_page(GFP_KERNEL); if (!page) { ret = -ENOMEM; goto out; } frag = &sinfo->frags[sinfo->nr_frags++]; data_len = min_t(u32, kattr->test.data_size_in - size, PAGE_SIZE); skb_frag_fill_page_desc(frag, page, 0, data_len); if (copy_from_user(page_address(page), data_in + size, data_len)) { ret = -EFAULT; goto out; } sinfo->xdp_frags_size += data_len; size += data_len; } xdp_buff_set_frags_flag(&xdp); } if (repeat > 1) bpf_prog_change_xdp(NULL, prog); if (do_live) ret = bpf_test_run_xdp_live(prog, &xdp, repeat, batch_size, &duration); else ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); /* We convert the xdp_buff back to an xdp_md before checking the return * code so the reference count of any held netdevice will be decremented * even if the test run failed. */ xdp_convert_buff_to_md(&xdp, ctx); if (ret) goto out; size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct xdp_md)); out: if (repeat > 1) bpf_prog_change_xdp(prog, NULL); free_data: for (i = 0; i < sinfo->nr_frags; i++) __free_page(skb_frag_page(&sinfo->frags[i])); kfree(data); free_ctx: kfree(ctx); return ret; } static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx) { /* make sure the fields we don't use are zeroed */ if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags))) return -EINVAL; /* flags is allowed */ if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags), sizeof(struct bpf_flow_keys))) return -EINVAL; return 0; } int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { struct bpf_test_timer t = { NO_PREEMPT }; u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; struct bpf_flow_keys *user_ctx; struct bpf_flow_keys flow_keys; const struct ethhdr *eth; unsigned int flags = 0; u32 retval, duration; void *data; int ret; if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; if (size < ETH_HLEN) return -EINVAL; data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0); if (IS_ERR(data)) return PTR_ERR(data); eth = (struct ethhdr *)data; if (!repeat) repeat = 1; user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys)); if (IS_ERR(user_ctx)) { kfree(data); return PTR_ERR(user_ctx); } if (user_ctx) { ret = verify_user_bpf_flow_keys(user_ctx); if (ret) goto out; flags = user_ctx->flags; } ctx.flow_keys = &flow_keys; ctx.data = data; ctx.data_end = (__u8 *)data + size; bpf_test_timer_enter(&t); do { retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, size, flags); } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration)); bpf_test_timer_leave(&t); if (ret < 0) goto out; ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL, sizeof(flow_keys), retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(struct bpf_flow_keys)); out: kfree(user_ctx); kfree(data); return ret; } int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { struct bpf_test_timer t = { NO_PREEMPT }; struct bpf_prog_array *progs = NULL; struct bpf_sk_lookup_kern ctx = {}; u32 repeat = kattr->test.repeat; struct bpf_sk_lookup *user_ctx; u32 retval, duration; int ret = -EINVAL; if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out || kattr->test.data_size_out) return -EINVAL; if (!repeat) repeat = 1; user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx)); if (IS_ERR(user_ctx)) return PTR_ERR(user_ctx); if (!user_ctx) return -EINVAL; if (user_ctx->sk) goto out; if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) goto out; if (user_ctx->local_port > U16_MAX) { ret = -ERANGE; goto out; } ctx.family = (u16)user_ctx->family; ctx.protocol = (u16)user_ctx->protocol; ctx.dport = (u16)user_ctx->local_port; ctx.sport = user_ctx->remote_port; switch (ctx.family) { case AF_INET: ctx.v4.daddr = (__force __be32)user_ctx->local_ip4; ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6; ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6; break; #endif default: ret = -EAFNOSUPPORT; goto out; } progs = bpf_prog_array_alloc(1, GFP_KERNEL); if (!progs) { ret = -ENOMEM; goto out; } progs->items[0].prog = prog; bpf_test_timer_enter(&t); do { ctx.selected_sk = NULL; retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run); } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration)); bpf_test_timer_leave(&t); if (ret < 0) goto out; user_ctx->cookie = 0; if (ctx.selected_sk) { if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) { ret = -EOPNOTSUPP; goto out; } user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); } ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); out: bpf_prog_array_free(progs); kfree(user_ctx); return ret; } int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in); __u32 ctx_size_in = kattr->test.ctx_size_in; void *ctx = NULL; u32 retval; int err = 0; /* doesn't support data_in/out, ctx_out, duration, or repeat or flags */ if (kattr->test.data_in || kattr->test.data_out || kattr->test.ctx_out || kattr->test.duration || kattr->test.repeat || kattr->test.flags || kattr->test.batch_size) return -EINVAL; if (ctx_size_in < prog->aux->max_ctx_offset || ctx_size_in > U16_MAX) return -EINVAL; if (ctx_size_in) { ctx = memdup_user(ctx_in, ctx_size_in); if (IS_ERR(ctx)) return PTR_ERR(ctx); } rcu_read_lock_trace(); retval = bpf_prog_run_pin_on_cpu(prog, ctx); rcu_read_unlock_trace(); if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) { err = -EFAULT; goto out; } if (ctx_size_in) if (copy_to_user(ctx_in, ctx, ctx_size_in)) err = -EFAULT; out: kfree(ctx); return err; } static int verify_and_copy_hook_state(struct nf_hook_state *state, |