Total coverage: 307466 (14%)of 2278589
267 266 230 105 105 1 1 43 44 66 1 51 56 6 5 5 7 3 1 2 1 900 886 1 1 6 7 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/types.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/tty.h> #include <linux/fcntl.h> #include <linux/uaccess.h> #include "tty.h" static int is_ignored(int sig) { return (sigismember(&current->blocked, sig) || current->sighand->action[sig-1].sa.sa_handler == SIG_IGN); } /** * __tty_check_change - check for POSIX terminal changes * @tty: tty to check * @sig: signal to send * * If we try to write to, or set the state of, a terminal and we're * not in the foreground, send a SIGTTOU. If the signal is blocked or * ignored, go ahead and perform the operation. (POSIX 7.2) * * Locking: ctrl.lock */ int __tty_check_change(struct tty_struct *tty, int sig) { unsigned long flags; struct pid *pgrp, *tty_pgrp; int ret = 0; if (current->signal->tty != tty) return 0; rcu_read_lock(); pgrp = task_pgrp(current); spin_lock_irqsave(&tty->ctrl.lock, flags); tty_pgrp = tty->ctrl.pgrp; spin_unlock_irqrestore(&tty->ctrl.lock, flags); if (tty_pgrp && pgrp != tty_pgrp) { if (is_ignored(sig)) { if (sig == SIGTTIN) ret = -EIO; } else if (is_current_pgrp_orphaned()) ret = -EIO; else { kill_pgrp(pgrp, sig, 1); set_thread_flag(TIF_SIGPENDING); ret = -ERESTARTSYS; } } rcu_read_unlock(); if (!tty_pgrp) tty_warn(tty, "sig=%d, tty->pgrp == NULL!\n", sig); return ret; } int tty_check_change(struct tty_struct *tty) { return __tty_check_change(tty, SIGTTOU); } EXPORT_SYMBOL(tty_check_change); void proc_clear_tty(struct task_struct *p) { unsigned long flags; struct tty_struct *tty; spin_lock_irqsave(&p->sighand->siglock, flags); tty = p->signal->tty; p->signal->tty = NULL; spin_unlock_irqrestore(&p->sighand->siglock, flags); tty_kref_put(tty); } /** * __proc_set_tty - set the controlling terminal * @tty: tty structure * * Only callable by the session leader and only if it does not already have * a controlling terminal. * * Caller must hold: tty_lock() * a readlock on tasklist_lock * sighand lock */ static void __proc_set_tty(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty->ctrl.lock, flags); /* * The session and fg pgrp references will be non-NULL if * tiocsctty() is stealing the controlling tty */ put_pid(tty->ctrl.session); put_pid(tty->ctrl.pgrp); tty->ctrl.pgrp = get_pid(task_pgrp(current)); tty->ctrl.session = get_pid(task_session(current)); spin_unlock_irqrestore(&tty->ctrl.lock, flags); if (current->signal->tty) { tty_debug(tty, "current tty %s not NULL!!\n", current->signal->tty->name); tty_kref_put(current->signal->tty); } put_pid(current->signal->tty_old_pgrp); current->signal->tty = tty_kref_get(tty); current->signal->tty_old_pgrp = NULL; } static void proc_set_tty(struct tty_struct *tty) { spin_lock_irq(&current->sighand->siglock); __proc_set_tty(tty); spin_unlock_irq(&current->sighand->siglock); } /* * Called by tty_open() to set the controlling tty if applicable. */ void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty) { read_lock(&tasklist_lock); spin_lock_irq(&current->sighand->siglock); if (current->signal->leader && !current->signal->tty && tty->ctrl.session == NULL) { /* * Don't let a process that only has write access to the tty * obtain the privileges associated with having a tty as * controlling terminal (being able to reopen it with full * access through /dev/tty, being able to perform pushback). * Many distributions set the group of all ttys to "tty" and * grant write-only access to all terminals for setgid tty * binaries, which should not imply full privileges on all ttys. * * This could theoretically break old code that performs open() * on a write-only file descriptor. In that case, it might be * necessary to also permit this if * inode_permission(inode, MAY_READ) == 0. */ if (filp->f_mode & FMODE_READ) __proc_set_tty(tty); } spin_unlock_irq(&current->sighand->siglock); read_unlock(&tasklist_lock); } struct tty_struct *get_current_tty(void) { struct tty_struct *tty; unsigned long flags; spin_lock_irqsave(&current->sighand->siglock, flags); tty = tty_kref_get(current->signal->tty); spin_unlock_irqrestore(&current->sighand->siglock, flags); return tty; } EXPORT_SYMBOL_GPL(get_current_tty); /* * Called from tty_release(). */ void session_clear_tty(struct pid *session) { struct task_struct *p; do_each_pid_task(session, PIDTYPE_SID, p) { proc_clear_tty(p); } while_each_pid_task(session, PIDTYPE_SID, p); } /** * tty_signal_session_leader - sends SIGHUP to session leader * @tty: controlling tty * @exit_session: if non-zero, signal all foreground group processes * * Send SIGHUP and SIGCONT to the session leader and its process group. * Optionally, signal all processes in the foreground process group. * * Returns the number of processes in the session with this tty * as their controlling terminal. This value is used to drop * tty references for those processes. */ int tty_signal_session_leader(struct tty_struct *tty, int exit_session) { struct task_struct *p; int refs = 0; struct pid *tty_pgrp = NULL; read_lock(&tasklist_lock); if (tty->ctrl.session) { do_each_pid_task(tty->ctrl.session, PIDTYPE_SID, p) { spin_lock_irq(&p->sighand->siglock); if (p->signal->tty == tty) { p->signal->tty = NULL; /* * We defer the dereferences outside of * the tasklist lock. */ refs++; } if (!p->signal->leader) { spin_unlock_irq(&p->sighand->siglock); continue; } send_signal_locked(SIGHUP, SEND_SIG_PRIV, p, PIDTYPE_TGID); send_signal_locked(SIGCONT, SEND_SIG_PRIV, p, PIDTYPE_TGID); put_pid(p->signal->tty_old_pgrp); /* A noop */ spin_lock(&tty->ctrl.lock); tty_pgrp = get_pid(tty->ctrl.pgrp); if (tty->ctrl.pgrp) p->signal->tty_old_pgrp = get_pid(tty->ctrl.pgrp); spin_unlock(&tty->ctrl.lock); spin_unlock_irq(&p->sighand->siglock); } while_each_pid_task(tty->ctrl.session, PIDTYPE_SID, p); } read_unlock(&tasklist_lock); if (tty_pgrp) { if (exit_session) kill_pgrp(tty_pgrp, SIGHUP, exit_session); put_pid(tty_pgrp); } return refs; } /** * disassociate_ctty - disconnect controlling tty * @on_exit: true if exiting so need to "hang up" the session * * This function is typically called only by the session leader, when * it wants to disassociate itself from its controlling tty. * * It performs the following functions: * (1) Sends a SIGHUP and SIGCONT to the foreground process group * (2) Clears the tty from being controlling the session * (3) Clears the controlling tty for all processes in the * session group. * * The argument on_exit is set to 1 if called when a process is * exiting; it is 0 if called by the ioctl TIOCNOTTY. * * Locking: * BTM is taken for hysterical raisons, and held when * called from no_tty(). * tty_mutex is taken to protect tty * ->siglock is taken to protect ->signal/->sighand * tasklist_lock is taken to walk process list for sessions * ->siglock is taken to protect ->signal/->sighand */ void disassociate_ctty(int on_exit) { struct tty_struct *tty; if (!current->signal->leader) return; tty = get_current_tty(); if (tty) { if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) { tty_vhangup_session(tty); } else { struct pid *tty_pgrp = tty_get_pgrp(tty); if (tty_pgrp) { kill_pgrp(tty_pgrp, SIGHUP, on_exit); if (!on_exit) kill_pgrp(tty_pgrp, SIGCONT, on_exit); put_pid(tty_pgrp); } } tty_kref_put(tty); } else if (on_exit) { struct pid *old_pgrp; spin_lock_irq(&current->sighand->siglock); old_pgrp = current->signal->tty_old_pgrp; current->signal->tty_old_pgrp = NULL; spin_unlock_irq(&current->sighand->siglock); if (old_pgrp) { kill_pgrp(old_pgrp, SIGHUP, on_exit); kill_pgrp(old_pgrp, SIGCONT, on_exit); put_pid(old_pgrp); } return; } tty = get_current_tty(); if (tty) { unsigned long flags; tty_lock(tty); spin_lock_irqsave(&tty->ctrl.lock, flags); put_pid(tty->ctrl.session); put_pid(tty->ctrl.pgrp); tty->ctrl.session = NULL; tty->ctrl.pgrp = NULL; spin_unlock_irqrestore(&tty->ctrl.lock, flags); tty_unlock(tty); tty_kref_put(tty); } /* If tty->ctrl.pgrp is not NULL, it may be assigned to * current->signal->tty_old_pgrp in a race condition, and * cause pid memleak. Release current->signal->tty_old_pgrp * after tty->ctrl.pgrp set to NULL. */ spin_lock_irq(&current->sighand->siglock); put_pid(current->signal->tty_old_pgrp); current->signal->tty_old_pgrp = NULL; spin_unlock_irq(&current->sighand->siglock); /* Now clear signal->tty under the lock */ read_lock(&tasklist_lock); session_clear_tty(task_session(current)); read_unlock(&tasklist_lock); } /* * * no_tty - Ensure the current process does not have a controlling tty */ void no_tty(void) { /* * FIXME: Review locking here. The tty_lock never covered any race * between a new association and proc_clear_tty but possibly we need * to protect against this anyway. */ struct task_struct *tsk = current; disassociate_ctty(0); proc_clear_tty(tsk); } /** * tiocsctty - set controlling tty * @tty: tty structure * @file: file structure used to check permissions * @arg: user argument * * This ioctl is used to manage job control. It permits a session * leader to set this tty as the controlling tty for the session. * * Locking: * Takes tty_lock() to serialize proc_set_tty() for this tty * Takes tasklist_lock internally to walk sessions * Takes ->siglock() when updating signal->tty */ static int tiocsctty(struct tty_struct *tty, struct file *file, int arg) { int ret = 0; tty_lock(tty); read_lock(&tasklist_lock); if (current->signal->leader && task_session(current) == tty->ctrl.session) goto unlock; /* * The process must be a session leader and * not have a controlling tty already. */ if (!current->signal->leader || current->signal->tty) { ret = -EPERM; goto unlock; } if (tty->ctrl.session) { /* * This tty is already the controlling * tty for another session group! */ if (arg == 1 && capable(CAP_SYS_ADMIN)) { /* * Steal it away */ session_clear_tty(tty->ctrl.session); } else { ret = -EPERM; goto unlock; } } /* See the comment in tty_open_proc_set_tty(). */ if ((file->f_mode & FMODE_READ) == 0 && !capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto unlock; } proc_set_tty(tty); unlock: read_unlock(&tasklist_lock); tty_unlock(tty); return ret; } /** * tty_get_pgrp - return a ref counted pgrp pid * @tty: tty to read * * Returns a refcounted instance of the pid struct for the process * group controlling the tty. */ struct pid *tty_get_pgrp(struct tty_struct *tty) { unsigned long flags; struct pid *pgrp; spin_lock_irqsave(&tty->ctrl.lock, flags); pgrp = get_pid(tty->ctrl.pgrp); spin_unlock_irqrestore(&tty->ctrl.lock, flags); return pgrp; } EXPORT_SYMBOL_GPL(tty_get_pgrp); /* * This checks not only the pgrp, but falls back on the pid if no * satisfactory pgrp is found. I dunno - gdb doesn't work correctly * without this... * * The caller must hold rcu lock or the tasklist lock. */ static struct pid *session_of_pgrp(struct pid *pgrp) { struct task_struct *p; struct pid *sid = NULL; p = pid_task(pgrp, PIDTYPE_PGID); if (p == NULL) p = pid_task(pgrp, PIDTYPE_PID); if (p != NULL) sid = task_session(p); return sid; } /** * tiocgpgrp - get process group * @tty: tty passed by user * @real_tty: tty side of the tty passed by the user if a pty else the tty * @p: returned pid * * Obtain the process group of the tty. If there is no process group * return an error. * * Locking: none. Reference to current->signal->tty is safe. */ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { struct pid *pid; int ret; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; pid = tty_get_pgrp(real_tty); ret = put_user(pid_vnr(pid), p); put_pid(pid); return ret; } /** * tiocspgrp - attempt to set process group * @tty: tty passed by user * @real_tty: tty side device matching tty passed by user * @p: pid pointer * * Set the process group of the tty to the session passed. Only * permitted where the tty session is our session. * * Locking: RCU, ctrl lock */ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { struct pid *pgrp; pid_t pgrp_nr; int retval = tty_check_change(real_tty); if (retval == -EIO) return -ENOTTY; if (retval) return retval; if (get_user(pgrp_nr, p)) return -EFAULT; if (pgrp_nr < 0) return -EINVAL; spin_lock_irq(&real_tty->ctrl.lock); if (!current->signal->tty || (current->signal->tty != real_tty) || (real_tty->ctrl.session != task_session(current))) { retval = -ENOTTY; goto out_unlock_ctrl; } rcu_read_lock(); pgrp = find_vpid(pgrp_nr); retval = -ESRCH; if (!pgrp) goto out_unlock; retval = -EPERM; if (session_of_pgrp(pgrp) != task_session(current)) goto out_unlock; retval = 0; put_pid(real_tty->ctrl.pgrp); real_tty->ctrl.pgrp = get_pid(pgrp); out_unlock: rcu_read_unlock(); out_unlock_ctrl: spin_unlock_irq(&real_tty->ctrl.lock); return retval; } /** * tiocgsid - get session id * @tty: tty passed by user * @real_tty: tty side of the tty passed by the user if a pty else the tty * @p: pointer to returned session id * * Obtain the session id of the tty. If there is no session * return an error. */ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { unsigned long flags; pid_t sid; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; spin_lock_irqsave(&real_tty->ctrl.lock, flags); if (!real_tty->ctrl.session) goto err; sid = pid_vnr(real_tty->ctrl.session); spin_unlock_irqrestore(&real_tty->ctrl.lock, flags); return put_user(sid, p); err: spin_unlock_irqrestore(&real_tty->ctrl.lock, flags); return -ENOTTY; } /* * Called from tty_ioctl(). If tty is a pty then real_tty is the slave side, * if not then tty == real_tty. */ long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, struct file *file, unsigned int cmd, unsigned long arg) { void __user *p = (void __user *)arg; switch (cmd) { case TIOCNOTTY: if (current->signal->tty != tty) return -ENOTTY; no_tty(); return 0; case TIOCSCTTY: return tiocsctty(real_tty, file, arg); case TIOCGPGRP: return tiocgpgrp(tty, real_tty, p); case TIOCSPGRP: return tiocspgrp(tty, real_tty, p); case TIOCGSID: return tiocgsid(tty, real_tty, p); } return -ENOIOCTLCMD; }
11 2 1 3 5 2 3 3 3 1 1 1 1 4 2 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_connmark.c netfilter connmark retriever action * skb mark is over-written * * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/pkt_cls.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_connmark.h> #include <net/tc_act/tc_connmark.h> #include <net/tc_wrapper.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> static struct tc_action_ops act_connmark_ops; TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; struct tcf_connmark_info *ca = to_connmark(a); struct tcf_connmark_parms *parms; struct nf_conntrack_zone zone; struct nf_conn *c; int proto; tcf_lastuse_update(&ca->tcf_tm); tcf_action_update_bstats(&ca->common, skb); parms = rcu_dereference_bh(ca->parms); switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; break; case htons(ETH_P_IPV6): if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; break; default: goto out; } c = nf_ct_get(skb, &ctinfo); if (c) { skb->mark = READ_ONCE(c->mark); goto count; } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net, &tuple)) goto out; zone.id = parms->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; thash = nf_conntrack_find_get(parms->net, &zone, &tuple); if (!thash) goto out; c = nf_ct_tuplehash_to_ctrack(thash); skb->mark = READ_ONCE(c->mark); nf_ct_put(c); count: /* using overlimits stats to count how many packets marked */ tcf_action_inc_overlimit_qstats(&ca->common); out: return READ_ONCE(ca->tcf_action); } static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) }, }; static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); struct tcf_connmark_parms *nparms, *oparms; struct nlattr *tb[TCA_CONNMARK_MAX + 1]; bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; int ret = 0, err; u32 index; if (!nla) return -EINVAL; ret = nla_parse_nested_deprecated(tb, TCA_CONNMARK_MAX, nla, connmark_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_CONNMARK_PARMS]) return -EINVAL; nparms = kzalloc(sizeof(*nparms), GFP_KERNEL); if (!nparms) return -ENOMEM; parm = nla_data(tb[TCA_CONNMARK_PARMS]); index = parm->index; ret = tcf_idr_check_alloc(tn, &index, a, bind); if (!ret) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_connmark_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); err = ret; goto out_free; } ci = to_connmark(*a); nparms->net = net; nparms->zone = parm->zone; ret = ACT_P_CREATED; } else if (ret > 0) { ci = to_connmark(*a); if (bind) { err = ACT_P_BOUND; goto out_free; } if (!(flags & TCA_ACT_FLAGS_REPLACE)) { err = -EEXIST; goto release_idr; } nparms->net = rtnl_dereference(ci->parms)->net; nparms->zone = parm->zone; ret = 0; } else { err = ret; goto out_free; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; spin_lock_bh(&ci->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock)); spin_unlock_bh(&ci->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (oparms) kfree_rcu(oparms, rcu); return ret; release_idr: tcf_idr_release(*a, bind); out_free: kfree(nparms); return err; } static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_connmark_info *ci = to_connmark(a); struct tc_connmark opt = { .index = ci->tcf_index, .refcnt = refcount_read(&ci->tcf_refcnt) - ref, .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, }; struct tcf_connmark_parms *parms; struct tcf_t t; spin_lock_bh(&ci->tcf_lock); parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock)); opt.action = ci->tcf_action; opt.zone = parms->zone; if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) goto nla_put_failure; tcf_tm_dump(&t, &ci->tcf_tm); if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t, TCA_CONNMARK_PAD)) goto nla_put_failure; spin_unlock_bh(&ci->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&ci->tcf_lock); nlmsg_trim(skb, b); return -1; } static void tcf_connmark_cleanup(struct tc_action *a) { struct tcf_connmark_info *ci = to_connmark(a); struct tcf_connmark_parms *parms; parms = rcu_dereference_protected(ci->parms, 1); if (parms) kfree_rcu(parms, rcu); } static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .id = TCA_ID_CONNMARK, .owner = THIS_MODULE, .act = tcf_connmark_act, .dump = tcf_connmark_dump, .init = tcf_connmark_init, .cleanup = tcf_connmark_cleanup, .size = sizeof(struct tcf_connmark_info), }; MODULE_ALIAS_NET_ACT("connmark"); static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_connmark_ops.net_id); } static struct pernet_operations connmark_net_ops = { .init = connmark_init_net, .exit_batch = connmark_exit_net, .id = &act_connmark_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init connmark_init_module(void) { return tcf_register_action(&act_connmark_ops, &connmark_net_ops); } static void __exit connmark_cleanup_module(void) { tcf_unregister_action(&act_connmark_ops, &connmark_net_ops); } module_init(connmark_init_module); module_exit(connmark_cleanup_module); MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); MODULE_DESCRIPTION("Connection tracking mark restoring"); MODULE_LICENSE("GPL");
2 2 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2000-2001 Vojtech Pavlik * Copyright (c) 2006-2010 Jiri Kosina * * HID to Linux Input mapping */ /* * * Should you need to contact me, the author, you can do so either by * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail: * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic */ #include <linux/module.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/hid.h> #include <linux/hid-debug.h> #include "hid-ids.h" #define unk KEY_UNKNOWN static const unsigned char hid_keyboard[256] = { 0, 0, 0, 0, 30, 48, 46, 32, 18, 33, 34, 35, 23, 36, 37, 38, 50, 49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45, 21, 44, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 1, 14, 15, 57, 12, 13, 26, 27, 43, 43, 39, 40, 41, 51, 52, 53, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 87, 88, 99, 70,119,110,102,104,111,107,109,106, 105,108,103, 69, 98, 55, 74, 78, 96, 79, 80, 81, 75, 76, 77, 71, 72, 73, 82, 83, 86,127,116,117,183,184,185,186,187,188,189,190, 191,192,193,194,134,138,130,132,128,129,131,137,133,135,136,113, 115,114,unk,unk,unk,121,unk, 89, 93,124, 92, 94, 95,unk,unk,unk, 122,123, 90, 91, 85,unk,unk,unk,unk,unk,unk,unk,111,unk,unk,unk, unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk, unk,unk,unk,unk,unk,unk,179,180,unk,unk,unk,unk,unk,unk,unk,unk, unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk, unk,unk,unk,unk,unk,unk,unk,unk,111,unk,unk,unk,unk,unk,unk,unk, 29, 42, 56,125, 97, 54,100,126,164,166,165,163,161,115,114,113, 150,158,159,128,136,177,178,176,142,152,173,140,unk,unk,unk,unk }; static const struct { __s32 x; __s32 y; } hid_hat_to_axis[] = {{ 0, 0}, { 0,-1}, { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1}, {-1, 1}, {-1, 0}, {-1,-1}}; struct usage_priority { __u32 usage; /* the HID usage associated */ bool global; /* we assume all usages to be slotted, * unless global */ unsigned int slot_overwrite; /* for globals: allows to set the usage * before or after the slots */ }; /* * hid-input will convert this list into priorities: * the first element will have the highest priority * (the length of the following array) and the last * element the lowest (1). * * hid-input will then shift the priority by 8 bits to leave some space * in case drivers want to interleave other fields. * * To accommodate slotted devices, the slot priority is * defined in the next 8 bits (defined by 0xff - slot). * * If drivers want to add fields before those, hid-input will * leave out the first 8 bits of the priority value. * * This still leaves us 65535 individual priority values. */ static const struct usage_priority hidinput_usages_priorities[] = { { /* Eraser (eraser touching) must always come before tipswitch */ .usage = HID_DG_ERASER, }, { /* Invert must always come before In Range */ .usage = HID_DG_INVERT, }, { /* Is the tip of the tool touching? */ .usage = HID_DG_TIPSWITCH, }, { /* Tip Pressure might emulate tip switch */ .usage = HID_DG_TIPPRESSURE, }, { /* In Range needs to come after the other tool states */ .usage = HID_DG_INRANGE, }, }; #define map_abs(c) hid_map_usage(hidinput, usage, &bit, &max, EV_ABS, (c)) #define map_rel(c) hid_map_usage(hidinput, usage, &bit, &max, EV_REL, (c)) #define map_key(c) hid_map_usage(hidinput, usage, &bit, &max, EV_KEY, (c)) #define map_led(c) hid_map_usage(hidinput, usage, &bit, &max, EV_LED, (c)) #define map_msc(c) hid_map_usage(hidinput, usage, &bit, &max, EV_MSC, (c)) #define map_abs_clear(c) hid_map_usage_clear(hidinput, usage, &bit, \ &max, EV_ABS, (c)) #define map_key_clear(c) hid_map_usage_clear(hidinput, usage, &bit, \ &max, EV_KEY, (c)) static bool match_scancode(struct hid_usage *usage, unsigned int cur_idx, unsigned int scancode) { return (usage->hid & (HID_USAGE_PAGE | HID_USAGE)) == scancode; } static bool match_keycode(struct hid_usage *usage, unsigned int cur_idx, unsigned int keycode) { /* * We should exclude unmapped usages when doing lookup by keycode. */ return (usage->type == EV_KEY && usage->code == keycode); } static bool match_index(struct hid_usage *usage, unsigned int cur_idx, unsigned int idx) { return cur_idx == idx; } typedef bool (*hid_usage_cmp_t)(struct hid_usage *usage, unsigned int cur_idx, unsigned int val); static struct hid_usage *hidinput_find_key(struct hid_device *hid, hid_usage_cmp_t match, unsigned int value, unsigned int *usage_idx) { unsigned int i, j, k, cur_idx = 0; struct hid_report *report; struct hid_usage *usage; for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { list_for_each_entry(report, &hid->report_enum[k].report_list, list) { for (i = 0; i < report->maxfield; i++) { for (j = 0; j < report->field[i]->maxusage; j++) { usage = report->field[i]->usage + j; if (usage->type == EV_KEY || usage->type == 0) { if (match(usage, cur_idx, value)) { if (usage_idx) *usage_idx = cur_idx; return usage; } cur_idx++; } } } } } return NULL; } static struct hid_usage *hidinput_locate_usage(struct hid_device *hid, const struct input_keymap_entry *ke, unsigned int *index) { struct hid_usage *usage; unsigned int scancode; if (ke->flags & INPUT_KEYMAP_BY_INDEX) usage = hidinput_find_key(hid, match_index, ke->index, index); else if (input_scancode_to_scalar(ke, &scancode) == 0) usage = hidinput_find_key(hid, match_scancode, scancode, index); else usage = NULL; return usage; } static int hidinput_getkeycode(struct input_dev *dev, struct input_keymap_entry *ke) { struct hid_device *hid = input_get_drvdata(dev); struct hid_usage *usage; unsigned int scancode, index; usage = hidinput_locate_usage(hid, ke, &index); if (usage) { ke->keycode = usage->type == EV_KEY ? usage->code : KEY_RESERVED; ke->index = index; scancode = usage->hid & (HID_USAGE_PAGE | HID_USAGE); ke->len = sizeof(scancode); memcpy(ke->scancode, &scancode, sizeof(scancode)); return 0; } return -EINVAL; } static int hidinput_setkeycode(struct input_dev *dev, const struct input_keymap_entry *ke, unsigned int *old_keycode) { struct hid_device *hid = input_get_drvdata(dev); struct hid_usage *usage; usage = hidinput_locate_usage(hid, ke, NULL); if (usage) { *old_keycode = usage->type == EV_KEY ? usage->code : KEY_RESERVED; usage->type = EV_KEY; usage->code = ke->keycode; clear_bit(*old_keycode, dev->keybit); set_bit(usage->code, dev->keybit); dbg_hid("Assigned keycode %d to HID usage code %x\n", usage->code, usage->hid); /* * Set the keybit for the old keycode if the old keycode is used * by another key */ if (hidinput_find_key(hid, match_keycode, *old_keycode, NULL)) set_bit(*old_keycode, dev->keybit); return 0; } return -EINVAL; } /** * hidinput_calc_abs_res - calculate an absolute axis resolution * @field: the HID report field to calculate resolution for * @code: axis code * * The formula is: * (logical_maximum - logical_minimum) * resolution = ---------------------------------------------------------- * (physical_maximum - physical_minimum) * 10 ^ unit_exponent * * as seen in the HID specification v1.11 6.2.2.7 Global Items. * * Only exponent 1 length units are processed. Centimeters and inches are * converted to millimeters. Degrees are converted to radians. */ __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code) { __s32 unit_exponent = field->unit_exponent; __s32 logical_extents = field->logical_maximum - field->logical_minimum; __s32 physical_extents = field->physical_maximum - field->physical_minimum; __s32 prev; /* Check if the extents are sane */ if (logical_extents <= 0 || physical_extents <= 0) return 0; /* * Verify and convert units. * See HID specification v1.11 6.2.2.7 Global Items for unit decoding */ switch (code) { case ABS_X: case ABS_Y: case ABS_Z: case ABS_MT_POSITION_X: case ABS_MT_POSITION_Y: case ABS_MT_TOOL_X: case ABS_MT_TOOL_Y: case ABS_MT_TOUCH_MAJOR: case ABS_MT_TOUCH_MINOR: if (field->unit == 0x11) { /* If centimeters */ /* Convert to millimeters */ unit_exponent += 1; } else if (field->unit == 0x13) { /* If inches */ /* Convert to millimeters */ prev = physical_extents; physical_extents *= 254; if (physical_extents < prev) return 0; unit_exponent -= 1; } else { return 0; } break; case ABS_RX: case ABS_RY: case ABS_RZ: case ABS_WHEEL: case ABS_TILT_X: case ABS_TILT_Y: if (field->unit == 0x14) { /* If degrees */ /* Convert to radians */ prev = logical_extents; logical_extents *= 573; if (logical_extents < prev) return 0; unit_exponent += 1; } else if (field->unit != 0x12) { /* If not radians */ return 0; } break; default: return 0; } /* Apply negative unit exponent */ for (; unit_exponent < 0; unit_exponent++) { prev = logical_extents; logical_extents *= 10; if (logical_extents < prev) return 0; } /* Apply positive unit exponent */ for (; unit_exponent > 0; unit_exponent--) { prev = physical_extents; physical_extents *= 10; if (physical_extents < prev) return 0; } /* Calculate resolution */ return DIV_ROUND_CLOSEST(logical_extents, physical_extents); } EXPORT_SYMBOL_GPL(hidinput_calc_abs_res); #ifdef CONFIG_HID_BATTERY_STRENGTH static enum power_supply_property hidinput_battery_props[] = { POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_SCOPE, }; #define HID_BATTERY_QUIRK_PERCENT (1 << 0) /* always reports percent */ #define HID_BATTERY_QUIRK_FEATURE (1 << 1) /* ask for feature report */ #define HID_BATTERY_QUIRK_IGNORE (1 << 2) /* completely ignore the battery */ #define HID_BATTERY_QUIRK_AVOID_QUERY (1 << 3) /* do not query the battery */ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD), HID_BATTERY_QUIRK_IGNORE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_3), HID_BATTERY_QUIRK_IGNORE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), HID_BATTERY_QUIRK_IGNORE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_SW), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_CHROMEBOOK_TROGDOR_POMPOM), HID_BATTERY_QUIRK_AVOID_QUERY }, /* * Elan I2C-HID touchscreens seem to all report a non present battery, * set HID_BATTERY_QUIRK_IGNORE for all Elan I2C-HID devices. */ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_IGNORE }, {} }; static unsigned find_battery_quirk(struct hid_device *hdev) { unsigned quirks = 0; const struct hid_device_id *match; match = hid_match_id(hdev, hid_battery_quirks); if (match != NULL) quirks = match->driver_data; return quirks; } static int hidinput_scale_battery_capacity(struct hid_device *dev, int value) { if (dev->battery_min < dev->battery_max && value >= dev->battery_min && value <= dev->battery_max) value = ((value - dev->battery_min) * 100) / (dev->battery_max - dev->battery_min); return value; } static int hidinput_query_battery_capacity(struct hid_device *dev) { u8 *buf; int ret; buf = kmalloc(4, GFP_KERNEL); if (!buf) return -ENOMEM; ret = hid_hw_raw_request(dev, dev->battery_report_id, buf, 4, dev->battery_report_type, HID_REQ_GET_REPORT); if (ret < 2) { kfree(buf); return -ENODATA; } ret = hidinput_scale_battery_capacity(dev, buf[1]); kfree(buf); return ret; } static int hidinput_get_battery_property(struct power_supply *psy, enum power_supply_property prop, union power_supply_propval *val) { struct hid_device *dev = power_supply_get_drvdata(psy); int value; int ret = 0; switch (prop) { case POWER_SUPPLY_PROP_PRESENT: case POWER_SUPPLY_PROP_ONLINE: val->intval = 1; break; case POWER_SUPPLY_PROP_CAPACITY: if (dev->battery_status != HID_BATTERY_REPORTED && !dev->battery_avoid_query) { value = hidinput_query_battery_capacity(dev); if (value < 0) return value; } else { value = dev->battery_capacity; } val->intval = value; break; case POWER_SUPPLY_PROP_MODEL_NAME: val->strval = dev->name; break; case POWER_SUPPLY_PROP_STATUS: if (dev->battery_status != HID_BATTERY_REPORTED && !dev->battery_avoid_query) { value = hidinput_query_battery_capacity(dev); if (value < 0) return value; dev->battery_capacity = value; dev->battery_status = HID_BATTERY_QUERIED; } if (dev->battery_status == HID_BATTERY_UNKNOWN) val->intval = POWER_SUPPLY_STATUS_UNKNOWN; else val->intval = dev->battery_charge_status; break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; default: ret = -EINVAL; break; } return ret; } static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field, bool is_percentage) { struct power_supply_desc *psy_desc; struct power_supply_config psy_cfg = { .drv_data = dev, }; unsigned quirks; s32 min, max; int error; if (dev->battery) return 0; /* already initialized? */ quirks = find_battery_quirk(dev); hid_dbg(dev, "device %x:%x:%x %d quirks %d\n", dev->bus, dev->vendor, dev->product, dev->version, quirks); if (quirks & HID_BATTERY_QUIRK_IGNORE) return 0; psy_desc = kzalloc(sizeof(*psy_desc), GFP_KERNEL); if (!psy_desc) return -ENOMEM; psy_desc->name = kasprintf(GFP_KERNEL, "hid-%s-battery", strlen(dev->uniq) ? dev->uniq : dev_name(&dev->dev)); if (!psy_desc->name) { error = -ENOMEM; goto err_free_mem; } psy_desc->type = POWER_SUPPLY_TYPE_BATTERY; psy_desc->properties = hidinput_battery_props; psy_desc->num_properties = ARRAY_SIZE(hidinput_battery_props); psy_desc->use_for_apm = 0; psy_desc->get_property = hidinput_get_battery_property; min = field->logical_minimum; max = field->logical_maximum; if (is_percentage || (quirks & HID_BATTERY_QUIRK_PERCENT)) { min = 0; max = 100; } if (quirks & HID_BATTERY_QUIRK_FEATURE) report_type = HID_FEATURE_REPORT; dev->battery_min = min; dev->battery_max = max; dev->battery_report_type = report_type; dev->battery_report_id = field->report->id; dev->battery_charge_status = POWER_SUPPLY_STATUS_DISCHARGING; /* * Stylus is normally not connected to the device and thus we * can't query the device and get meaningful battery strength. * We have to wait for the device to report it on its own. */ dev->battery_avoid_query = report_type == HID_INPUT_REPORT && field->physical == HID_DG_STYLUS; if (quirks & HID_BATTERY_QUIRK_AVOID_QUERY) dev->battery_avoid_query = true; dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg); if (IS_ERR(dev->battery)) { error = PTR_ERR(dev->battery); hid_warn(dev, "can't register power supply: %d\n", error); goto err_free_name; } power_supply_powers(dev->battery, &dev->dev); return 0; err_free_name: kfree(psy_desc->name); err_free_mem: kfree(psy_desc); dev->battery = NULL; return error; } static void hidinput_cleanup_battery(struct hid_device *dev) { const struct power_supply_desc *psy_desc; if (!dev->battery) return; psy_desc = dev->battery->desc; power_supply_unregister(dev->battery); kfree(psy_desc->name); kfree(psy_desc); dev->battery = NULL; } static void hidinput_update_battery(struct hid_device *dev, int value) { int capacity; if (!dev->battery) return; if (value == 0 || value < dev->battery_min || value > dev->battery_max) return; capacity = hidinput_scale_battery_capacity(dev, value); if (dev->battery_status != HID_BATTERY_REPORTED || capacity != dev->battery_capacity || ktime_after(ktime_get_coarse(), dev->battery_ratelimit_time)) { dev->battery_capacity = capacity; dev->battery_status = HID_BATTERY_REPORTED; dev->battery_ratelimit_time = ktime_add_ms(ktime_get_coarse(), 30 * 1000); power_supply_changed(dev->battery); } } static bool hidinput_set_battery_charge_status(struct hid_device *dev, unsigned int usage, int value) { switch (usage) { case HID_BAT_CHARGING: dev->battery_charge_status = value ? POWER_SUPPLY_STATUS_CHARGING : POWER_SUPPLY_STATUS_DISCHARGING; return true; } return false; } #else /* !CONFIG_HID_BATTERY_STRENGTH */ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field, bool is_percentage) { return 0; } static void hidinput_cleanup_battery(struct hid_device *dev) { } static void hidinput_update_battery(struct hid_device *dev, int value) { } static bool hidinput_set_battery_charge_status(struct hid_device *dev, unsigned int usage, int value) { return false; } #endif /* CONFIG_HID_BATTERY_STRENGTH */ static bool hidinput_field_in_collection(struct hid_device *device, struct hid_field *field, unsigned int type, unsigned int usage) { struct hid_collection *collection; collection = &device->collection[field->usage->collection_index]; return collection->type == type && collection->usage == usage; } static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_field *field, struct hid_usage *usage, unsigned int usage_index) { struct input_dev *input = hidinput->input; struct hid_device *device = input_get_drvdata(input); const struct usage_priority *usage_priority = NULL; int max = 0, code; unsigned int i = 0; unsigned long *bit = NULL; field->hidinput = hidinput; if (field->flags & HID_MAIN_ITEM_CONSTANT) goto ignore; /* Ignore if report count is out of bounds. */ if (field->report_count < 1) goto ignore; /* only LED usages are supported in output fields */ if (field->report_type == HID_OUTPUT_REPORT && (usage->hid & HID_USAGE_PAGE) != HID_UP_LED) { goto ignore; } /* assign a priority based on the static list declared here */ for (i = 0; i < ARRAY_SIZE(hidinput_usages_priorities); i++) { if (usage->hid == hidinput_usages_priorities[i].usage) { usage_priority = &hidinput_usages_priorities[i]; field->usages_priorities[usage_index] = (ARRAY_SIZE(hidinput_usages_priorities) - i) << 8; break; } } /* * For slotted devices, we need to also add the slot index * in the priority. */ if (usage_priority && usage_priority->global) field->usages_priorities[usage_index] |= usage_priority->slot_overwrite; else field->usages_priorities[usage_index] |= (0xff - field->slot_idx) << 16; if (device->driver->input_mapping) { int ret = device->driver->input_mapping(device, hidinput, field, usage, &bit, &max); if (ret > 0) goto mapped; if (ret < 0) goto ignore; } switch (usage->hid & HID_USAGE_PAGE) { case HID_UP_UNDEFINED: goto ignore; case HID_UP_KEYBOARD: set_bit(EV_REP, input->evbit); if ((usage->hid & HID_USAGE) < 256) { if (!hid_keyboard[usage->hid & HID_USAGE]) goto ignore; map_key_clear(hid_keyboard[usage->hid & HID_USAGE]); } else map_key(KEY_UNKNOWN); break; case HID_UP_BUTTON: code = ((usage->hid - 1) & HID_USAGE); switch (field->application) { case HID_GD_MOUSE: case HID_GD_POINTER: code += BTN_MOUSE; break; case HID_GD_JOYSTICK: if (code <= 0xf) code += BTN_JOYSTICK; else code += BTN_TRIGGER_HAPPY - 0x10; break; case HID_GD_GAMEPAD: if (code <= 0xf) code += BTN_GAMEPAD; else code += BTN_TRIGGER_HAPPY - 0x10; break; case HID_CP_CONSUMER_CONTROL: if (hidinput_field_in_collection(device, field, HID_COLLECTION_NAMED_ARRAY, HID_CP_PROGRAMMABLEBUTTONS)) { if (code <= 0x1d) code += KEY_MACRO1; else code += BTN_TRIGGER_HAPPY - 0x1e; break; } fallthrough; default: switch (field->physical) { case HID_GD_MOUSE: case HID_GD_POINTER: code += BTN_MOUSE; break; case HID_GD_JOYSTICK: code += BTN_JOYSTICK; break; case HID_GD_GAMEPAD: code += BTN_GAMEPAD; break; default: code += BTN_MISC; } } map_key(code); break; case HID_UP_SIMULATION: switch (usage->hid & 0xffff) { case 0xba: map_abs(ABS_RUDDER); break; case 0xbb: map_abs(ABS_THROTTLE); break; case 0xc4: map_abs(ABS_GAS); break; case 0xc5: map_abs(ABS_BRAKE); break; case 0xc8: map_abs(ABS_WHEEL); break; default: goto ignore; } break; case HID_UP_GENDESK: if ((usage->hid & 0xf0) == 0x80) { /* SystemControl */ switch (usage->hid & 0xf) { case 0x1: map_key_clear(KEY_POWER); break; case 0x2: map_key_clear(KEY_SLEEP); break; case 0x3: map_key_clear(KEY_WAKEUP); break; case 0x4: map_key_clear(KEY_CONTEXT_MENU); break; case 0x5: map_key_clear(KEY_MENU); break; case 0x6: map_key_clear(KEY_PROG1); break; case 0x7: map_key_clear(KEY_HELP); break; case 0x8: map_key_clear(KEY_EXIT); break; case 0x9: map_key_clear(KEY_SELECT); break; case 0xa: map_key_clear(KEY_RIGHT); break; case 0xb: map_key_clear(KEY_LEFT); break; case 0xc: map_key_clear(KEY_UP); break; case 0xd: map_key_clear(KEY_DOWN); break; case 0xe: map_key_clear(KEY_POWER2); break; case 0xf: map_key_clear(KEY_RESTART); break; default: goto unknown; } break; } if ((usage->hid & 0xf0) == 0x90) { /* SystemControl & D-pad */ switch (usage->hid) { case HID_GD_UP: usage->hat_dir = 1; break; case HID_GD_DOWN: usage->hat_dir = 5; break; case HID_GD_RIGHT: usage->hat_dir = 3; break; case HID_GD_LEFT: usage->hat_dir = 7; break; case HID_GD_DO_NOT_DISTURB: map_key_clear(KEY_DO_NOT_DISTURB); break; default: goto unknown; } if (usage->hid <= HID_GD_LEFT) { if (field->dpad) { map_abs(field->dpad); goto ignore; } map_abs(ABS_HAT0X); } break; } if ((usage->hid & 0xf0) == 0xa0) { /* SystemControl */ switch (usage->hid & 0xf) { case 0x9: map_key_clear(KEY_MICMUTE); break; case 0xa: map_key_clear(KEY_ACCESSIBILITY); break; default: goto ignore; } break; } if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */ switch (usage->hid & 0xf) { case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break; default: goto ignore; } break; } /* * Some lazy vendors declare 255 usages for System Control, * leading to the creation of ABS_X|Y axis and too many others. * It wouldn't be a problem if joydev doesn't consider the * device as a joystick then. */ if (field->application == HID_GD_SYSTEM_CONTROL) goto ignore; switch (usage->hid) { /* These usage IDs map directly to the usage codes. */ case HID_GD_X: case HID_GD_Y: case HID_GD_Z: case HID_GD_RX: case HID_GD_RY: case HID_GD_RZ: if (field->flags & HID_MAIN_ITEM_RELATIVE) map_rel(usage->hid & 0xf); else map_abs_clear(usage->hid & 0xf); break; case HID_GD_WHEEL: if (field->flags & HID_MAIN_ITEM_RELATIVE) { set_bit(REL_WHEEL, input->relbit); map_rel(REL_WHEEL_HI_RES); } else { map_abs(usage->hid & 0xf); } break; case HID_GD_SLIDER: case HID_GD_DIAL: if (field->flags & HID_MAIN_ITEM_RELATIVE) map_rel(usage->hid & 0xf); else map_abs(usage->hid & 0xf); break; case HID_GD_HATSWITCH: usage->hat_min = field->logical_minimum; usage->hat_max = field->logical_maximum; map_abs(ABS_HAT0X); break; case HID_GD_START: map_key_clear(BTN_START); break; case HID_GD_SELECT: map_key_clear(BTN_SELECT); break; case HID_GD_RFKILL_BTN: /* MS wireless radio ctl extension, also check CA */ if (field->application == HID_GD_WIRELESS_RADIO_CTLS) { map_key_clear(KEY_RFKILL); /* We need to simulate the btn release */ field->flags |= HID_MAIN_ITEM_RELATIVE; break; } goto unknown; default: goto unknown; } break; case HID_UP_LED: switch (usage->hid & 0xffff) { /* HID-Value: */ case 0x01: map_led (LED_NUML); break; /* "Num Lock" */ case 0x02: map_led (LED_CAPSL); break; /* "Caps Lock" */ case 0x03: map_led (LED_SCROLLL); break; /* "Scroll Lock" */ case 0x04: map_led (LED_COMPOSE); break; /* "Compose" */ case 0x05: map_led (LED_KANA); break; /* "Kana" */ case 0x27: map_led (LED_SLEEP); break; /* "Stand-By" */ case 0x4c: map_led (LED_SUSPEND); break; /* "System Suspend" */ case 0x09: map_led (LED_MUTE); break; /* "Mute" */ case 0x4b: map_led (LED_MISC); break; /* "Generic Indicator" */ case 0x19: map_led (LED_MAIL); break; /* "Message Waiting" */ case 0x4d: map_led (LED_CHARGING); break; /* "External Power Connected" */ default: goto ignore; } break; case HID_UP_DIGITIZER: if ((field->application & 0xff) == 0x01) /* Digitizer */ __set_bit(INPUT_PROP_POINTER, input->propbit); else if ((field->application & 0xff) == 0x02) /* Pen */ __set_bit(INPUT_PROP_DIRECT, input->propbit); switch (usage->hid & 0xff) { case 0x00: /* Undefined */ goto ignore; case 0x30: /* TipPressure */ if (!test_bit(BTN_TOUCH, input->keybit)) { device->quirks |= HID_QUIRK_NOTOUCH; set_bit(EV_KEY, input->evbit); set_bit(BTN_TOUCH, input->keybit); } map_abs_clear(ABS_PRESSURE); break; case 0x32: /* InRange */ switch (field->physical) { case HID_DG_PUCK: map_key(BTN_TOOL_MOUSE); break; case HID_DG_FINGER: map_key(BTN_TOOL_FINGER); break; default: /* * If the physical is not given, * rely on the application. */ if (!field->physical) { switch (field->application) { case HID_DG_TOUCHSCREEN: case HID_DG_TOUCHPAD: map_key_clear(BTN_TOOL_FINGER); break; default: map_key_clear(BTN_TOOL_PEN); } } else { map_key(BTN_TOOL_PEN); } break; } break; case 0x3b: /* Battery Strength */ hidinput_setup_battery(device, HID_INPUT_REPORT, field, false); usage->type = EV_PWR; return; case 0x3c: /* Invert */ device->quirks &= ~HID_QUIRK_NOINVERT; map_key_clear(BTN_TOOL_RUBBER); break; case 0x3d: /* X Tilt */ map_abs_clear(ABS_TILT_X); break; case 0x3e: /* Y Tilt */ map_abs_clear(ABS_TILT_Y); break; case 0x33: /* Touch */ case 0x42: /* TipSwitch */ case 0x43: /* TipSwitch2 */ device->quirks &= ~HID_QUIRK_NOTOUCH; map_key_clear(BTN_TOUCH); break; case 0x44: /* BarrelSwitch */ map_key_clear(BTN_STYLUS); break; case 0x45: /* ERASER */ /* * This event is reported when eraser tip touches the surface. * Actual eraser (BTN_TOOL_RUBBER) is set and released either * by Invert if tool reports proximity or by Eraser directly. */ if (!test_bit(BTN_TOOL_RUBBER, input->keybit)) { device->quirks |= HID_QUIRK_NOINVERT; set_bit(BTN_TOOL_RUBBER, input->keybit); } map_key_clear(BTN_TOUCH); break; case 0x46: /* TabletPick */ case 0x5a: /* SecondaryBarrelSwitch */ map_key_clear(BTN_STYLUS2); break; case 0x5b: /* TransducerSerialNumber */ case 0x6e: /* TransducerSerialNumber2 */ map_msc(MSC_SERIAL); break; default: goto unknown; } break; case HID_UP_TELEPHONY: switch (usage->hid & HID_USAGE) { case 0x2f: map_key_clear(KEY_MICMUTE); break; case 0xb0: map_key_clear(KEY_NUMERIC_0); break; case 0xb1: map_key_clear(KEY_NUMERIC_1); break; case 0xb2: map_key_clear(KEY_NUMERIC_2); break; case 0xb3: map_key_clear(KEY_NUMERIC_3); break; case 0xb4: map_key_clear(KEY_NUMERIC_4); break; case 0xb5: map_key_clear(KEY_NUMERIC_5); break; case 0xb6: map_key_clear(KEY_NUMERIC_6); break; case 0xb7: map_key_clear(KEY_NUMERIC_7); break; case 0xb8: map_key_clear(KEY_NUMERIC_8); break; case 0xb9: map_key_clear(KEY_NUMERIC_9); break; case 0xba: map_key_clear(KEY_NUMERIC_STAR); break; case 0xbb: map_key_clear(KEY_NUMERIC_POUND); break; case 0xbc: map_key_clear(KEY_NUMERIC_A); break; case 0xbd: map_key_clear(KEY_NUMERIC_B); break; case 0xbe: map_key_clear(KEY_NUMERIC_C); break; case 0xbf: map_key_clear(KEY_NUMERIC_D); break; default: goto ignore; } break; case HID_UP_CONSUMER: /* USB HUT v1.12, pages 75-84 */ switch (usage->hid & HID_USAGE) { case 0x000: goto ignore; case 0x030: map_key_clear(KEY_POWER); break; case 0x031: map_key_clear(KEY_RESTART); break; case 0x032: map_key_clear(KEY_SLEEP); break; case 0x034: map_key_clear(KEY_SLEEP); break; case 0x035: map_key_clear(KEY_KBDILLUMTOGGLE); break; case 0x036: map_key_clear(BTN_MISC); break; case 0x040: map_key_clear(KEY_MENU); break; /* Menu */ case 0x041: map_key_clear(KEY_SELECT); break; /* Menu Pick */ case 0x042: map_key_clear(KEY_UP); break; /* Menu Up */ case 0x043: map_key_clear(KEY_DOWN); break; /* Menu Down */ case 0x044: map_key_clear(KEY_LEFT); break; /* Menu Left */ case 0x045: map_key_clear(KEY_RIGHT); break; /* Menu Right */ case 0x046: map_key_clear(KEY_ESC); break; /* Menu Escape */ case 0x047: map_key_clear(KEY_KPPLUS); break; /* Menu Value Increase */ case 0x048: map_key_clear(KEY_KPMINUS); break; /* Menu Value Decrease */ case 0x060: map_key_clear(KEY_INFO); break; /* Data On Screen */ case 0x061: map_key_clear(KEY_SUBTITLE); break; /* Closed Caption */ case 0x063: map_key_clear(KEY_VCR); break; /* VCR/TV */ case 0x065: map_key_clear(KEY_CAMERA); break; /* Snapshot */ case 0x069: map_key_clear(KEY_RED); break; case 0x06a: map_key_clear(KEY_GREEN); break; case 0x06b: map_key_clear(KEY_BLUE); break; case 0x06c: map_key_clear(KEY_YELLOW); break; case 0x06d: map_key_clear(KEY_ASPECT_RATIO); break; case 0x06f: map_key_clear(KEY_BRIGHTNESSUP); break; case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN); break; case 0x072: map_key_clear(KEY_BRIGHTNESS_TOGGLE); break; case 0x073: map_key_clear(KEY_BRIGHTNESS_MIN); break; case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break; case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break; case 0x076: map_key_clear(KEY_CAMERA_ACCESS_ENABLE); break; case 0x077: map_key_clear(KEY_CAMERA_ACCESS_DISABLE); break; case 0x078: map_key_clear(KEY_CAMERA_ACCESS_TOGGLE); break; case 0x079: map_key_clear(KEY_KBDILLUMUP); break; case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break; case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break; case 0x082: map_key_clear(KEY_VIDEO_NEXT); break; case 0x083: map_key_clear(KEY_LAST); break; case 0x084: map_key_clear(KEY_ENTER); break; case 0x088: map_key_clear(KEY_PC); break; case 0x089: map_key_clear(KEY_TV); break; case 0x08a: map_key_clear(KEY_WWW); break; case 0x08b: map_key_clear(KEY_DVD); break; case 0x08c: map_key_clear(KEY_PHONE); break; case 0x08d: map_key_clear(KEY_PROGRAM); break; case 0x08e: map_key_clear(KEY_VIDEOPHONE); break; case 0x08f: map_key_clear(KEY_GAMES); break; case 0x090: map_key_clear(KEY_MEMO); break; case 0x091: map_key_clear(KEY_CD); break; case 0x092: map_key_clear(KEY_VCR); break; case 0x093: map_key_clear(KEY_TUNER); break; case 0x094: map_key_clear(KEY_EXIT); break; case 0x095: map_key_clear(KEY_HELP); break; case 0x096: map_key_clear(KEY_TAPE); break; case 0x097: map_key_clear(KEY_TV2); break; case 0x098: map_key_clear(KEY_SAT); break; case 0x09a: map_key_clear(KEY_PVR); break; case 0x09c: map_key_clear(KEY_CHANNELUP); break; case 0x09d: map_key_clear(KEY_CHANNELDOWN); break; case 0x0a0: map_key_clear(KEY_VCR2); break; case 0x0b0: map_key_clear(KEY_PLAY); break; case 0x0b1: map_key_clear(KEY_PAUSE); break; case 0x0b2: map_key_clear(KEY_RECORD); break; case 0x0b3: map_key_clear(KEY_FASTFORWARD); break; case 0x0b4: map_key_clear(KEY_REWIND); break; case 0x0b5: map_key_clear(KEY_NEXTSONG); break; case 0x0b6: map_key_clear(KEY_PREVIOUSSONG); break; case 0x0b7: map_key_clear(KEY_STOPCD); break; case 0x0b8: map_key_clear(KEY_EJECTCD); break; case 0x0bc: map_key_clear(KEY_MEDIA_REPEAT); break; case 0x0b9: map_key_clear(KEY_SHUFFLE); break; case 0x0bf: map_key_clear(KEY_SLOW); break; case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break; case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break; case 0x0d8: map_key_clear(KEY_DICTATE); break; case 0x0d9: map_key_clear(KEY_EMOJI_PICKER); break; case 0x0e0: map_abs_clear(ABS_VOLUME); break; case 0x0e2: map_key_clear(KEY_MUTE); break; case 0x0e5: map_key_clear(KEY_BASSBOOST); break; case 0x0e9: map_key_clear(KEY_VOLUMEUP); break; case 0x0ea: map_key_clear(KEY_VOLUMEDOWN); break; case 0x0f5: map_key_clear(KEY_SLOW); break; case 0x181: map_key_clear(KEY_BUTTONCONFIG); break; case 0x182: map_key_clear(KEY_BOOKMARKS); break; case 0x183: map_key_clear(KEY_CONFIG); break; case 0x184: map_key_clear(KEY_WORDPROCESSOR); break; case 0x185: map_key_clear(KEY_EDITOR); break; case 0x186: map_key_clear(KEY_SPREADSHEET); break; case 0x187: map_key_clear(KEY_GRAPHICSEDITOR); break; case 0x188: map_key_clear(KEY_PRESENTATION); break; case 0x189: map_key_clear(KEY_DATABASE); break; case 0x18a: map_key_clear(KEY_MAIL); break; case 0x18b: map_key_clear(KEY_NEWS); break; case 0x18c: map_key_clear(KEY_VOICEMAIL); break; case 0x18d: map_key_clear(KEY_ADDRESSBOOK); break; case 0x18e: map_key_clear(KEY_CALENDAR); break; case 0x18f: map_key_clear(KEY_TASKMANAGER); break; case 0x190: map_key_clear(KEY_JOURNAL); break; case 0x191: map_key_clear(KEY_FINANCE); break; case 0x192: map_key_clear(KEY_CALC); break; case 0x193: map_key_clear(KEY_PLAYER); break; case 0x194: map_key_clear(KEY_FILE); break; case 0x196: map_key_clear(KEY_WWW); break; case 0x199: map_key_clear(KEY_CHAT); break; case 0x19c: map_key_clear(KEY_LOGOFF); break; case 0x19e: map_key_clear(KEY_COFFEE); break; case 0x19f: map_key_clear(KEY_CONTROLPANEL); break; case 0x1a2: map_key_clear(KEY_APPSELECT); break; case 0x1a3: map_key_clear(KEY_NEXT); break; case 0x1a4: map_key_clear(KEY_PREVIOUS); break; case 0x1a6: map_key_clear(KEY_HELP); break; case 0x1a7: map_key_clear(KEY_DOCUMENTS); break; case 0x1ab: map_key_clear(KEY_SPELLCHECK); break; case 0x1ae: map_key_clear(KEY_KEYBOARD); break; case 0x1b1: map_key_clear(KEY_SCREENSAVER); break; case 0x1b4: map_key_clear(KEY_FILE); break; case 0x1b6: map_key_clear(KEY_IMAGES); break; case 0x1b7: map_key_clear(KEY_AUDIO); break; case 0x1b8: map_key_clear(KEY_VIDEO); break; case 0x1bc: map_key_clear(KEY_MESSENGER); break; case 0x1bd: map_key_clear(KEY_INFO); break; case 0x1cb: map_key_clear(KEY_ASSISTANT); break; case 0x201: map_key_clear(KEY_NEW); break; case 0x202: map_key_clear(KEY_OPEN); break; case 0x203: map_key_clear(KEY_CLOSE); break; case 0x204: map_key_clear(KEY_EXIT); break; case 0x207: map_key_clear(KEY_SAVE); break; case 0x208: map_key_clear(KEY_PRINT); break; case 0x209: map_key_clear(KEY_PROPS); break; case 0x21a: map_key_clear(KEY_UNDO); break; case 0x21b: map_key_clear(KEY_COPY); break; case 0x21c: map_key_clear(KEY_CUT); break; case 0x21d: map_key_clear(KEY_PASTE); break; case 0x21f: map_key_clear(KEY_FIND); break; case 0x221: map_key_clear(KEY_SEARCH); break; case 0x222: map_key_clear(KEY_GOTO); break; case 0x223: map_key_clear(KEY_HOMEPAGE); break; case 0x224: map_key_clear(KEY_BACK); break; case 0x225: map_key_clear(KEY_FORWARD); break; case 0x226: map_key_clear(KEY_STOP); break; case 0x227: map_key_clear(KEY_REFRESH); break; case 0x22a: map_key_clear(KEY_BOOKMARKS); break; case 0x22d: map_key_clear(KEY_ZOOMIN); break; case 0x22e: map_key_clear(KEY_ZOOMOUT); break; case 0x22f: map_key_clear(KEY_ZOOMRESET); break; case 0x232: map_key_clear(KEY_FULL_SCREEN); break; case 0x233: map_key_clear(KEY_SCROLLUP); break; case 0x234: map_key_clear(KEY_SCROLLDOWN); break; case 0x238: /* AC Pan */ set_bit(REL_HWHEEL, input->relbit); map_rel(REL_HWHEEL_HI_RES); break; case 0x23d: map_key_clear(KEY_EDIT); break; case 0x25f: map_key_clear(KEY_CANCEL); break; case 0x269: map_key_clear(KEY_INSERT); break; case 0x26a: map_key_clear(KEY_DELETE); break; case 0x279: map_key_clear(KEY_REDO); break; case 0x289: map_key_clear(KEY_REPLY); break; case 0x28b: map_key_clear(KEY_FORWARDMAIL); break; case 0x28c: map_key_clear(KEY_SEND); break; case 0x29d: map_key_clear(KEY_KBD_LAYOUT_NEXT); break; case 0x2a2: map_key_clear(KEY_ALL_APPLICATIONS); break; case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV); break; case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT); break; case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP); break; case 0x2ca: map_key_clear(KEY_KBDINPUTASSIST_NEXTGROUP); break; case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break; case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break; case 0x29f: map_key_clear(KEY_SCALE); break; default: map_key_clear(KEY_UNKNOWN); } break; case HID_UP_GENDEVCTRLS: switch (usage->hid) { case HID_DC_BATTERYSTRENGTH: hidinput_setup_battery(device, HID_INPUT_REPORT, field, false); usage->type = EV_PWR; return; } goto unknown; case HID_UP_BATTERY: switch (usage->hid) { case HID_BAT_ABSOLUTESTATEOFCHARGE: hidinput_setup_battery(device, HID_INPUT_REPORT, field, true); usage->type = EV_PWR; return; case HID_BAT_CHARGING: usage->type = EV_PWR; return; } goto unknown; case HID_UP_CAMERA: switch (usage->hid & HID_USAGE) { case 0x020: map_key_clear(KEY_CAMERA_FOCUS); break; case 0x021: map_key_clear(KEY_CAMERA); break; default: goto ignore; } break; case HID_UP_HPVENDOR: /* Reported on a Dutch layout HP5308 */ set_bit(EV_REP, input->evbit); switch (usage->hid & HID_USAGE) { case 0x021: map_key_clear(KEY_PRINT); break; case 0x070: map_key_clear(KEY_HP); break; case 0x071: map_key_clear(KEY_CAMERA); break; case 0x072: map_key_clear(KEY_SOUND); break; case 0x073: map_key_clear(KEY_QUESTION); break; case 0x080: map_key_clear(KEY_EMAIL); break; case 0x081: map_key_clear(KEY_CHAT); break; case 0x082: map_key_clear(KEY_SEARCH); break; case 0x083: map_key_clear(KEY_CONNECT); break; case 0x084: map_key_clear(KEY_FINANCE); break; case 0x085: map_key_clear(KEY_SPORT); break; case 0x086: map_key_clear(KEY_SHOP); break; default: goto ignore; } break; case HID_UP_HPVENDOR2: set_bit(EV_REP, input->evbit); switch (usage->hid & HID_USAGE) { case 0x001: map_key_clear(KEY_MICMUTE); break; case 0x003: map_key_clear(KEY_BRIGHTNESSDOWN); break; case 0x004: map_key_clear(KEY_BRIGHTNESSUP); break; default: goto ignore; } break; case HID_UP_MSVENDOR: goto ignore; case HID_UP_CUSTOM: /* Reported on Logitech and Apple USB keyboards */ set_bit(EV_REP, input->evbit); goto ignore; case HID_UP_LOGIVENDOR: /* intentional fallback */ case HID_UP_LOGIVENDOR2: /* intentional fallback */ case HID_UP_LOGIVENDOR3: goto ignore; case HID_UP_PID: switch (usage->hid & HID_USAGE) { case 0xa4: map_key_clear(BTN_DEAD); break; default: goto ignore; } break; default: unknown: if (field->report_size == 1) { if (field->report->type == HID_OUTPUT_REPORT) { map_led(LED_MISC); break; } map_key(BTN_MISC); break; } if (field->flags & HID_MAIN_ITEM_RELATIVE) { map_rel(REL_MISC); break; } map_abs(ABS_MISC); break; } mapped: /* Mapping failed, bail out */ if (!bit) return; if (device->driver->input_mapped && device->driver->input_mapped(device, hidinput, field, usage, &bit, &max) < 0) { /* * The driver indicated that no further generic handling * of the usage is desired. */ return; } set_bit(usage->type, input->evbit); /* * This part is *really* controversial: * - HID aims at being generic so we should do our best to export * all incoming events * - HID describes what events are, so there is no reason for ABS_X * to be mapped to ABS_Y * - HID is using *_MISC+N as a default value, but nothing prevents * *_MISC+N to overwrite a legitimate even, which confuses userspace * (for instance ABS_MISC + 7 is ABS_MT_SLOT, which has a different * processing) * * If devices still want to use this (at their own risk), they will * have to use the quirk HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE, but * the default should be a reliable mapping. */ while (usage->code <= max && test_and_set_bit(usage->code, bit)) { if (device->quirks & HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE) { usage->code = find_next_zero_bit(bit, max + 1, usage->code); } else { device->status |= HID_STAT_DUP_DETECTED; goto ignore; } } if (usage->code > max) goto ignore; if (usage->type == EV_ABS) { int a = field->logical_minimum; int b = field->logical_maximum; if ((device->quirks & HID_QUIRK_BADPAD) && (usage->code == ABS_X || usage->code == ABS_Y)) { a = field->logical_minimum = 0; b = field->logical_maximum = 255; } if (field->application == HID_GD_GAMEPAD || field->application == HID_GD_JOYSTICK) input_set_abs_params(input, usage->code, a, b, (b - a) >> 8, (b - a) >> 4); else input_set_abs_params(input, usage->code, a, b, 0, 0); input_abs_set_res(input, usage->code, hidinput_calc_abs_res(field, usage->code)); /* use a larger default input buffer for MT devices */ if (usage->code == ABS_MT_POSITION_X && input->hint_events_per_packet == 0) input_set_events_per_packet(input, 60); } if (usage->type == EV_ABS && (usage->hat_min < usage->hat_max || usage->hat_dir)) { int i; for (i = usage->code; i < usage->code + 2 && i <= max; i++) { input_set_abs_params(input, i, -1, 1, 0, 0); set_bit(i, input->absbit); } if (usage->hat_dir && !field->dpad) field->dpad = usage->code; } /* for those devices which produce Consumer volume usage as relative, * we emulate pressing volumeup/volumedown appropriate number of times * in hidinput_hid_event() */ if ((usage->type == EV_ABS) && (field->flags & HID_MAIN_ITEM_RELATIVE) && (usage->code == ABS_VOLUME)) { set_bit(KEY_VOLUMEUP, input->keybit); set_bit(KEY_VOLUMEDOWN, input->keybit); } if (usage->type == EV_KEY) { set_bit(EV_MSC, input->evbit); set_bit(MSC_SCAN, input->mscbit); } return; ignore: usage->type = 0; usage->code = 0; } static void hidinput_handle_scroll(struct hid_usage *usage, struct input_dev *input, __s32 value) { int code; int hi_res, lo_res; if (value == 0) return; if (usage->code == REL_WHEEL_HI_RES) code = REL_WHEEL; else code = REL_HWHEEL; /* * Windows reports one wheel click as value 120. Where a high-res * scroll wheel is present, a fraction of 120 is reported instead. * Our REL_WHEEL_HI_RES axis does the same because all HW must * adhere to the 120 expectation. */ hi_res = value * 120/usage->resolution_multiplier; usage->wheel_accumulated += hi_res; lo_res = usage->wheel_accumulated/120; if (lo_res) usage->wheel_accumulated -= lo_res * 120; input_event(input, EV_REL, code, lo_res); input_event(input, EV_REL, usage->code, hi_res); } static void hid_report_release_tool(struct hid_report *report, struct input_dev *input, unsigned int tool) { /* if the given tool is not currently reported, ignore */ if (!test_bit(tool, input->key)) return; /* * if the given tool was previously set, release it, * release any TOUCH and send an EV_SYN */ input_event(input, EV_KEY, BTN_TOUCH, 0); input_event(input, EV_KEY, tool, 0); input_event(input, EV_SYN, SYN_REPORT, 0); report->tool = 0; } static void hid_report_set_tool(struct hid_report *report, struct input_dev *input, unsigned int new_tool) { if (report->tool != new_tool) hid_report_release_tool(report, input, report->tool); input_event(input, EV_KEY, new_tool, 1); report->tool = new_tool; } void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct input_dev *input; struct hid_report *report = field->report; unsigned *quirks = &hid->quirks; if (!usage->type) return; if (usage->type == EV_PWR) { bool handled = hidinput_set_battery_charge_status(hid, usage->hid, value); if (!handled) hidinput_update_battery(hid, value); return; } if (!field->hidinput) return; input = field->hidinput->input; if (usage->hat_min < usage->hat_max || usage->hat_dir) { int hat_dir = usage->hat_dir; if (!hat_dir) hat_dir = (value - usage->hat_min) * 8 / (usage->hat_max - usage->hat_min + 1) + 1; if (hat_dir < 0 || hat_dir > 8) hat_dir = 0; input_event(input, usage->type, usage->code , hid_hat_to_axis[hat_dir].x); input_event(input, usage->type, usage->code + 1, hid_hat_to_axis[hat_dir].y); return; } /* * Ignore out-of-range values as per HID specification, * section 5.10 and 6.2.25, when NULL state bit is present. * When it's not, clamp the value to match Microsoft's input * driver as mentioned in "Required HID usages for digitizers": * https://msdn.microsoft.com/en-us/library/windows/hardware/dn672278(v=vs.85).asp * * The logical_minimum < logical_maximum check is done so that we * don't unintentionally discard values sent by devices which * don't specify logical min and max. */ if ((field->flags & HID_MAIN_ITEM_VARIABLE) && field->logical_minimum < field->logical_maximum) { if (field->flags & HID_MAIN_ITEM_NULL_STATE && (value < field->logical_minimum || value > field->logical_maximum)) { dbg_hid("Ignoring out-of-range value %x\n", value); return; } value = clamp(value, field->logical_minimum, field->logical_maximum); } switch (usage->hid) { case HID_DG_ERASER: report->tool_active |= !!value; /* * if eraser is set, we must enforce BTN_TOOL_RUBBER * to accommodate for devices not following the spec. */ if (value) hid_report_set_tool(report, input, BTN_TOOL_RUBBER); else if (report->tool != BTN_TOOL_RUBBER) /* value is off, tool is not rubber, ignore */ return; else if (*quirks & HID_QUIRK_NOINVERT && !test_bit(BTN_TOUCH, input->key)) { /* * There is no invert to release the tool, let hid_input * send BTN_TOUCH with scancode and release the tool after. */ hid_report_release_tool(report, input, BTN_TOOL_RUBBER); return; } /* let hid-input set BTN_TOUCH */ break; case HID_DG_INVERT: report->tool_active |= !!value; /* * If invert is set, we store BTN_TOOL_RUBBER. */ if (value) hid_report_set_tool(report, input, BTN_TOOL_RUBBER); else if (!report->tool_active) /* tool_active not set means Invert and Eraser are not set */ hid_report_release_tool(report, input, BTN_TOOL_RUBBER); /* no further processing */ return; case HID_DG_INRANGE: report->tool_active |= !!value; if (report->tool_active) { /* * if tool is not set but is marked as active, * assume ours */ if (!report->tool) report->tool = usage->code; /* drivers may have changed the value behind our back, resend it */ hid_report_set_tool(report, input, report->tool); } else { hid_report_release_tool(report, input, usage->code); } /* reset tool_active for the next event */ report->tool_active = false; /* no further processing */ return; case HID_DG_TIPSWITCH: report->tool_active |= !!value; /* if tool is set to RUBBER we should ignore the current value */ if (report->tool == BTN_TOOL_RUBBER) return; break; case HID_DG_TIPPRESSURE: if (*quirks & HID_QUIRK_NOTOUCH) { int a = field->logical_minimum; int b = field->logical_maximum; if (value > a + ((b - a) >> 3)) { input_event(input, EV_KEY, BTN_TOUCH, 1); report->tool_active = true; } } break; case HID_UP_PID | 0x83UL: /* Simultaneous Effects Max */ dbg_hid("Maximum Effects - %d\n",value); return; case HID_UP_PID | 0x7fUL: dbg_hid("PID Pool Report\n"); return; } switch (usage->type) { case EV_KEY: if (usage->code == 0) /* Key 0 is "unassigned", not KEY_UNKNOWN */ return; break; case EV_REL: if (usage->code == REL_WHEEL_HI_RES || usage->code == REL_HWHEEL_HI_RES) { hidinput_handle_scroll(usage, input, value); return; } break; case EV_ABS: if ((field->flags & HID_MAIN_ITEM_RELATIVE) && usage->code == ABS_VOLUME) { int count = abs(value); int direction = value > 0 ? KEY_VOLUMEUP : KEY_VOLUMEDOWN; int i; for (i = 0; i < count; i++) { input_event(input, EV_KEY, direction, 1); input_sync(input); input_event(input, EV_KEY, direction, 0); input_sync(input); } return; } else if (((*quirks & HID_QUIRK_X_INVERT) && usage->code == ABS_X) || ((*quirks & HID_QUIRK_Y_INVERT) && usage->code == ABS_Y)) value = field->logical_maximum - value; break; } /* * Ignore reports for absolute data if the data didn't change. This is * not only an optimization but also fixes 'dead' key reports. Some * RollOver implementations for localized keys (like BACKSLASH/PIPE; HID * 0x31 and 0x32) report multiple keys, even though a localized keyboard * can only have one of them physically available. The 'dead' keys * report constant 0. As all map to the same keycode, they'd confuse * the input layer. If we filter the 'dead' keys on the HID level, we * skip the keycode translation and only forward real events. */ if (!(field->flags & (HID_MAIN_ITEM_RELATIVE | HID_MAIN_ITEM_BUFFERED_BYTE)) && (field->flags & HID_MAIN_ITEM_VARIABLE) && usage->usage_index < field->maxusage && value == field->value[usage->usage_index]) return; /* report the usage code as scancode if the key status has changed */ if (usage->type == EV_KEY && (!test_bit(usage->code, input->key)) == value) input_event(input, EV_MSC, MSC_SCAN, usage->hid); input_event(input, usage->type, usage->code, value); if ((field->flags & HID_MAIN_ITEM_RELATIVE) && usage->type == EV_KEY && value) { input_sync(input); input_event(input, usage->type, usage->code, 0); } } void hidinput_report_event(struct hid_device *hid, struct hid_report *report) { struct hid_input *hidinput; if (hid->quirks & HID_QUIRK_NO_INPUT_SYNC) return; list_for_each_entry(hidinput, &hid->inputs, list) input_sync(hidinput->input); } EXPORT_SYMBOL_GPL(hidinput_report_event); static int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field) { struct hid_report *report; int i, j; list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) { for (i = 0; i < report->maxfield; i++) { *field = report->field[i]; for (j = 0; j < (*field)->maxusage; j++) if ((*field)->usage[j].type == type && (*field)->usage[j].code == code) return j; } } return -1; } struct hid_field *hidinput_get_led_field(struct hid_device *hid) { struct hid_report *report; struct hid_field *field; int i, j; list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) { for (i = 0; i < report->maxfield; i++) { field = report->field[i]; for (j = 0; j < field->maxusage; j++) if (field->usage[j].type == EV_LED) return field; } } return NULL; } EXPORT_SYMBOL_GPL(hidinput_get_led_field); unsigned int hidinput_count_leds(struct hid_device *hid) { struct hid_report *report; struct hid_field *field; int i, j; unsigned int count = 0; list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) { for (i = 0; i < report->maxfield; i++) { field = report->field[i]; for (j = 0; j < field->maxusage; j++) if (field->usage[j].type == EV_LED && field->value[j]) count += 1; } } return count; } EXPORT_SYMBOL_GPL(hidinput_count_leds); static void hidinput_led_worker(struct work_struct *work) { struct hid_device *hid = container_of(work, struct hid_device, led_work); struct hid_field *field; struct hid_report *report; int ret; u32 len; __u8 *buf; field = hidinput_get_led_field(hid); if (!field) return; /* * field->report is accessed unlocked regarding HID core. So there might * be another incoming SET-LED request from user-space, which changes * the LED state while we assemble our outgoing buffer. However, this * doesn't matter as hid_output_report() correctly converts it into a * boolean value no matter what information is currently set on the LED * field (even garbage). So the remote device will always get a valid * request. * And in case we send a wrong value, a next led worker is spawned * for every SET-LED request so the following worker will send the * correct value, guaranteed! */ report = field->report; /* use custom SET_REPORT request if possible (asynchronous) */ if (hid->ll_driver->request) return hid->ll_driver->request(hid, report, HID_REQ_SET_REPORT); /* fall back to generic raw-output-report */ len = hid_report_len(report); buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) return; hid_output_report(report, buf); /* synchronous output report */ ret = hid_hw_output_report(hid, buf, len); if (ret == -ENOSYS) hid_hw_raw_request(hid, report->id, buf, len, HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); kfree(buf); } static int hidinput_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct hid_device *hid = input_get_drvdata(dev); struct hid_field *field; int offset; if (type == EV_FF) return input_ff_event(dev, type, code, value); if (type != EV_LED) return -1; if ((offset = hidinput_find_field(hid, type, code, &field)) == -1) { hid_warn(dev, "event field not found\n"); return -1; } hid_set_field(field, offset, value); schedule_work(&hid->led_work); return 0; } static int hidinput_open(struct input_dev *dev) { struct hid_device *hid = input_get_drvdata(dev); return hid_hw_open(hid); } static void hidinput_close(struct input_dev *dev) { struct hid_device *hid = input_get_drvdata(dev); hid_hw_close(hid); } static bool __hidinput_change_resolution_multipliers(struct hid_device *hid, struct hid_report *report, bool use_logical_max) { struct hid_usage *usage; bool update_needed = false; bool get_report_completed = false; int i, j; if (report->maxfield == 0) return false; for (i = 0; i < report->maxfield; i++) { __s32 value = use_logical_max ? report->field[i]->logical_maximum : report->field[i]->logical_minimum; /* There is no good reason for a Resolution * Multiplier to have a count other than 1. * Ignore that case. */ if (report->field[i]->report_count != 1) continue; for (j = 0; j < report->field[i]->maxusage; j++) { usage = &report->field[i]->usage[j]; if (usage->hid != HID_GD_RESOLUTION_MULTIPLIER) continue; /* * If we have more than one feature within this * report we need to fill in the bits from the * others before we can overwrite the ones for the * Resolution Multiplier. * * But if we're not allowed to read from the device, * we just bail. Such a device should not exist * anyway. */ if (!get_report_completed && report->maxfield > 1) { if (hid->quirks & HID_QUIRK_NO_INIT_REPORTS) return update_needed; hid_hw_request(hid, report, HID_REQ_GET_REPORT); hid_hw_wait(hid); get_report_completed = true; } report->field[i]->value[j] = value; update_needed = true; } } return update_needed; } static void hidinput_change_resolution_multipliers(struct hid_device *hid) { struct hid_report_enum *rep_enum; struct hid_report *rep; int ret; rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) { bool update_needed = __hidinput_change_resolution_multipliers(hid, rep, true); if (update_needed) { ret = __hid_request(hid, rep, HID_REQ_SET_REPORT); if (ret) { __hidinput_change_resolution_multipliers(hid, rep, false); return; } } } /* refresh our structs */ hid_setup_resolution_multiplier(hid); } static void report_features(struct hid_device *hid) { struct hid_driver *drv = hid->driver; struct hid_report_enum *rep_enum; struct hid_report *rep; struct hid_usage *usage; int i, j; rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) for (i = 0; i < rep->maxfield; i++) { /* Ignore if report count is out of bounds. */ if (rep->field[i]->report_count < 1) continue; for (j = 0; j < rep->field[i]->maxusage; j++) { usage = &rep->field[i]->usage[j]; /* Verify if Battery Strength feature is available */ if (usage->hid == HID_DC_BATTERYSTRENGTH) hidinput_setup_battery(hid, HID_FEATURE_REPORT, rep->field[i], false); if (drv->feature_mapping) drv->feature_mapping(hid, rep->field[i], usage); } } } static struct hid_input *hidinput_allocate(struct hid_device *hid, unsigned int application) { struct hid_input *hidinput = kzalloc(sizeof(*hidinput), GFP_KERNEL); struct input_dev *input_dev = input_allocate_device(); const char *suffix = NULL; size_t suffix_len, name_len; if (!hidinput || !input_dev) goto fail; if ((hid->quirks & HID_QUIRK_INPUT_PER_APP) && hid->maxapplication > 1) { switch (application) { case HID_GD_KEYBOARD: suffix = "Keyboard"; break; case HID_GD_KEYPAD: suffix = "Keypad"; break; case HID_GD_MOUSE: suffix = "Mouse"; break; case HID_DG_PEN: /* * yes, there is an issue here: * DG_PEN -> "Stylus" * DG_STYLUS -> "Pen" * But changing this now means users with config snippets * will have to change it and the test suite will not be happy. */ suffix = "Stylus"; break; case HID_DG_STYLUS: suffix = "Pen"; break; case HID_DG_TOUCHSCREEN: suffix = "Touchscreen"; break; case HID_DG_TOUCHPAD: suffix = "Touchpad"; break; case HID_GD_SYSTEM_CONTROL: suffix = "System Control"; break; case HID_CP_CONSUMER_CONTROL: suffix = "Consumer Control"; break; case HID_GD_WIRELESS_RADIO_CTLS: suffix = "Wireless Radio Control"; break; case HID_GD_SYSTEM_MULTIAXIS: suffix = "System Multi Axis"; break; default: break; } } if (suffix) { name_len = strlen(hid->name); suffix_len = strlen(suffix); if ((name_len < suffix_len) || strcmp(hid->name + name_len - suffix_len, suffix)) { hidinput->name = kasprintf(GFP_KERNEL, "%s %s", hid->name, suffix); if (!hidinput->name) goto fail; } } input_set_drvdata(input_dev, hid); input_dev->event = hidinput_input_event; input_dev->open = hidinput_open; input_dev->close = hidinput_close; input_dev->setkeycode = hidinput_setkeycode; input_dev->getkeycode = hidinput_getkeycode; input_dev->name = hidinput->name ? hidinput->name : hid->name; input_dev->phys = hid->phys; input_dev->uniq = hid->uniq; input_dev->id.bustype = hid->bus; input_dev->id.vendor = hid->vendor; input_dev->id.product = hid->product; input_dev->id.version = hid->version; input_dev->dev.parent = &hid->dev; hidinput->input = input_dev; hidinput->application = application; list_add_tail(&hidinput->list, &hid->inputs); INIT_LIST_HEAD(&hidinput->reports); return hidinput; fail: kfree(hidinput); input_free_device(input_dev); hid_err(hid, "Out of memory during hid input probe\n"); return NULL; } static bool hidinput_has_been_populated(struct hid_input *hidinput) { int i; unsigned long r = 0; for (i = 0; i < BITS_TO_LONGS(EV_CNT); i++) r |= hidinput->input->evbit[i]; for (i = 0; i < BITS_TO_LONGS(KEY_CNT); i++) r |= hidinput->input->keybit[i]; for (i = 0; i < BITS_TO_LONGS(REL_CNT); i++) r |= hidinput->input->relbit[i]; for (i = 0; i < BITS_TO_LONGS(ABS_CNT); i++) r |= hidinput->input->absbit[i]; for (i = 0; i < BITS_TO_LONGS(MSC_CNT); i++) r |= hidinput->input->mscbit[i]; for (i = 0; i < BITS_TO_LONGS(LED_CNT); i++) r |= hidinput->input->ledbit[i]; for (i = 0; i < BITS_TO_LONGS(SND_CNT); i++) r |= hidinput->input->sndbit[i]; for (i = 0; i < BITS_TO_LONGS(FF_CNT); i++) r |= hidinput->input->ffbit[i]; for (i = 0; i < BITS_TO_LONGS(SW_CNT); i++) r |= hidinput->input->swbit[i]; return !!r; } static void hidinput_cleanup_hidinput(struct hid_device *hid, struct hid_input *hidinput) { struct hid_report *report; int i, k; list_del(&hidinput->list); input_free_device(hidinput->input); kfree(hidinput->name); for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { if (k == HID_OUTPUT_REPORT && hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS) continue; list_for_each_entry(report, &hid->report_enum[k].report_list, list) { for (i = 0; i < report->maxfield; i++) if (report->field[i]->hidinput == hidinput) report->field[i]->hidinput = NULL; } } kfree(hidinput); } static struct hid_input *hidinput_match(struct hid_report *report) { struct hid_device *hid = report->device; struct hid_input *hidinput; list_for_each_entry(hidinput, &hid->inputs, list) { if (hidinput->report && hidinput->report->id == report->id) return hidinput; } return NULL; } static struct hid_input *hidinput_match_application(struct hid_report *report) { struct hid_device *hid = report->device; struct hid_input *hidinput; list_for_each_entry(hidinput, &hid->inputs, list) { if (hidinput->application == report->application) return hidinput; /* * Keep SystemControl and ConsumerControl applications together * with the main keyboard, if present. */ if ((report->application == HID_GD_SYSTEM_CONTROL || report->application == HID_CP_CONSUMER_CONTROL) && hidinput->application == HID_GD_KEYBOARD) { return hidinput; } } return NULL; } static inline void hidinput_configure_usages(struct hid_input *hidinput, struct hid_report *report) { int i, j, k; int first_field_index = 0; int slot_collection_index = -1; int prev_collection_index = -1; unsigned int slot_idx = 0; struct hid_field *field; /* * First tag all the fields that are part of a slot, * a slot needs to have one Contact ID in the collection */ for (i = 0; i < report->maxfield; i++) { field = report->field[i]; /* ignore fields without usage */ if (field->maxusage < 1) continue; /* * janitoring when collection_index changes */ if (prev_collection_index != field->usage->collection_index) { prev_collection_index = field->usage->collection_index; first_field_index = i; } /* * if we already found a Contact ID in the collection, * tag and continue to the next. */ if (slot_collection_index == field->usage->collection_index) { field->slot_idx = slot_idx; continue; } /* check if the current field has Contact ID */ for (j = 0; j < field->maxusage; j++) { if (field->usage[j].hid == HID_DG_CONTACTID) { slot_collection_index = field->usage->collection_index; slot_idx++; /* * mark all previous fields and this one in the * current collection to be slotted. */ for (k = first_field_index; k <= i; k++) report->field[k]->slot_idx = slot_idx; break; } } } for (i = 0; i < report->maxfield; i++) for (j = 0; j < report->field[i]->maxusage; j++) hidinput_configure_usage(hidinput, report->field[i], report->field[i]->usage + j, j); } /* * Register the input device; print a message. * Configure the input layer interface * Read all reports and initialize the absolute field values. */ int hidinput_connect(struct hid_device *hid, unsigned int force) { struct hid_driver *drv = hid->driver; struct hid_report *report; struct hid_input *next, *hidinput = NULL; unsigned int application; int i, k; INIT_LIST_HEAD(&hid->inputs); INIT_WORK(&hid->led_work, hidinput_led_worker); hid->status &= ~HID_STAT_DUP_DETECTED; if (!force) { for (i = 0; i < hid->maxcollection; i++) { struct hid_collection *col = &hid->collection[i]; if (col->type == HID_COLLECTION_APPLICATION || col->type == HID_COLLECTION_PHYSICAL) if (IS_INPUT_APPLICATION(col->usage)) break; } if (i == hid->maxcollection) return -1; } report_features(hid); for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { if (k == HID_OUTPUT_REPORT && hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS) continue; list_for_each_entry(report, &hid->report_enum[k].report_list, list) { if (!report->maxfield) continue; application = report->application; /* * Find the previous hidinput report attached * to this report id. */ if (hid->quirks & HID_QUIRK_MULTI_INPUT) hidinput = hidinput_match(report); else if (hid->maxapplication > 1 && (hid->quirks & HID_QUIRK_INPUT_PER_APP)) hidinput = hidinput_match_application(report); if (!hidinput) { hidinput = hidinput_allocate(hid, application); if (!hidinput) goto out_unwind; } hidinput_configure_usages(hidinput, report); if (hid->quirks & HID_QUIRK_MULTI_INPUT) hidinput->report = report; list_add_tail(&report->hidinput_list, &hidinput->reports); } } hidinput_change_resolution_multipliers(hid); list_for_each_entry_safe(hidinput, next, &hid->inputs, list) { if (drv->input_configured && drv->input_configured(hid, hidinput)) goto out_unwind; if (!hidinput_has_been_populated(hidinput)) { /* no need to register an input device not populated */ hidinput_cleanup_hidinput(hid, hidinput); continue; } if (input_register_device(hidinput->input)) goto out_unwind; hidinput->registered = true; } if (list_empty(&hid->inputs)) { hid_err(hid, "No inputs registered, leaving\n"); goto out_unwind; } if (hid->status & HID_STAT_DUP_DETECTED) hid_dbg(hid, "Some usages could not be mapped, please use HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE if this is legitimate.\n"); return 0; out_unwind: /* unwind the ones we already registered */ hidinput_disconnect(hid); return -1; } EXPORT_SYMBOL_GPL(hidinput_connect); void hidinput_disconnect(struct hid_device *hid) { struct hid_input *hidinput, *next; hidinput_cleanup_battery(hid); list_for_each_entry_safe(hidinput, next, &hid->inputs, list) { list_del(&hidinput->list); if (hidinput->registered) input_unregister_device(hidinput->input); else input_free_device(hidinput->input); kfree(hidinput->name); kfree(hidinput); } /* led_work is spawned by input_dev callbacks, but doesn't access the * parent input_dev at all. Once all input devices are removed, we * know that led_work will never get restarted, so we can cancel it * synchronously and are safe. */ cancel_work_sync(&hid->led_work); } EXPORT_SYMBOL_GPL(hidinput_disconnect); #ifdef CONFIG_HID_KUNIT_TEST #include "hid-input-test.c" #endif
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 // SPDX-License-Identifier: GPL-2.0-or-later /* * (Tentative) USB Audio Driver for ALSA * * Copyright (c) 2002 by Takashi Iwai <tiwai@suse.de> * * Many codes borrowed from audio.c by * Alan Cox (alan@lxorguk.ukuu.org.uk) * Thomas Sailer (sailer@ife.ee.ethz.ch) * * Audio Class 3.0 support by Ruslan Bilovol <ruslan.bilovol@gmail.com> * * NOTES: * * - the linked URBs would be preferred but not used so far because of * the instability of unlinking. * - type II is not supported properly. there is no device which supports * this type *correctly*. SB extigy looks as if it supports, but it's * indeed an AC3 stream packed in SPDIF frames (i.e. no real AC3 stream). */ #include <linux/bitops.h> #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/usb.h> #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/usb/audio.h> #include <linux/usb/audio-v2.h> #include <linux/usb/audio-v3.h> #include <linux/module.h> #include <sound/control.h> #include <sound/core.h> #include <sound/info.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include <sound/initval.h> #include "usbaudio.h" #include "card.h" #include "midi.h" #include "midi2.h" #include "mixer.h" #include "proc.h" #include "quirks.h" #include "endpoint.h" #include "helper.h" #include "pcm.h" #include "format.h" #include "power.h" #include "stream.h" #include "media.h" MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("USB Audio"); MODULE_LICENSE("GPL"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;/* Enable this card */ /* Vendor/product IDs for this card */ static int vid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; static int pid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; static int device_setup[SNDRV_CARDS]; /* device parameter for this card */ static bool ignore_ctl_error; static bool autoclock = true; static bool lowlatency = true; static char *quirk_alias[SNDRV_CARDS]; static char *delayed_register[SNDRV_CARDS]; static bool implicit_fb[SNDRV_CARDS]; static unsigned int quirk_flags[SNDRV_CARDS]; bool snd_usb_use_vmalloc = true; bool snd_usb_skip_validation; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the USB audio adapter."); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for the USB audio adapter."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable USB audio adapter."); module_param_array(vid, int, NULL, 0444); MODULE_PARM_DESC(vid, "Vendor ID for the USB audio device."); module_param_array(pid, int, NULL, 0444); MODULE_PARM_DESC(pid, "Product ID for the USB audio device."); module_param_array(device_setup, int, NULL, 0444); MODULE_PARM_DESC(device_setup, "Specific device setup (if needed)."); module_param(ignore_ctl_error, bool, 0444); MODULE_PARM_DESC(ignore_ctl_error, "Ignore errors from USB controller for mixer interfaces."); module_param(autoclock, bool, 0444); MODULE_PARM_DESC(autoclock, "Enable auto-clock selection for UAC2 devices (default: yes)."); module_param(lowlatency, bool, 0444); MODULE_PARM_DESC(lowlatency, "Enable low latency playback (default: yes)."); module_param_array(quirk_alias, charp, NULL, 0444); MODULE_PARM_DESC(quirk_alias, "Quirk aliases, e.g. 0123abcd:5678beef."); module_param_array(delayed_register, charp, NULL, 0444); MODULE_PARM_DESC(delayed_register, "Quirk for delayed registration, given by id:iface, e.g. 0123abcd:4."); module_param_array(implicit_fb, bool, NULL, 0444); MODULE_PARM_DESC(implicit_fb, "Apply generic implicit feedback sync mode."); module_param_array(quirk_flags, uint, NULL, 0444); MODULE_PARM_DESC(quirk_flags, "Driver quirk bit flags."); module_param_named(use_vmalloc, snd_usb_use_vmalloc, bool, 0444); MODULE_PARM_DESC(use_vmalloc, "Use vmalloc for PCM intermediate buffers (default: yes)."); module_param_named(skip_validation, snd_usb_skip_validation, bool, 0444); MODULE_PARM_DESC(skip_validation, "Skip unit descriptor validation (default: no)."); /* * we keep the snd_usb_audio_t instances by ourselves for merging * the all interfaces on the same card as one sound device. */ static DEFINE_MUTEX(register_mutex); static struct snd_usb_audio *usb_chip[SNDRV_CARDS]; static struct usb_driver usb_audio_driver; /* * disconnect streams * called from usb_audio_disconnect() */ static void snd_usb_stream_disconnect(struct snd_usb_stream *as) { int idx; struct snd_usb_substream *subs; for (idx = 0; idx < 2; idx++) { subs = &as->substream[idx]; if (!subs->num_formats) continue; subs->data_endpoint = NULL; subs->sync_endpoint = NULL; } } static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int interface) { struct usb_device *dev = chip->dev; struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; struct usb_interface *iface = usb_ifnum_to_if(dev, interface); if (!iface) { dev_err(&dev->dev, "%u:%d : does not exist\n", ctrlif, interface); return -EINVAL; } alts = &iface->altsetting[0]; altsd = get_iface_desc(alts); /* * Android with both accessory and audio interfaces enabled gets the * interface numbers wrong. */ if ((chip->usb_id == USB_ID(0x18d1, 0x2d04) || chip->usb_id == USB_ID(0x18d1, 0x2d05)) && interface == 0 && altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC && altsd->bInterfaceSubClass == USB_SUBCLASS_VENDOR_SPEC) { interface = 2; iface = usb_ifnum_to_if(dev, interface); if (!iface) return -EINVAL; alts = &iface->altsetting[0]; altsd = get_iface_desc(alts); } if (usb_interface_claimed(iface)) { dev_dbg(&dev->dev, "%d:%d: skipping, already claimed\n", ctrlif, interface); return -EINVAL; } if ((altsd->bInterfaceClass == USB_CLASS_AUDIO || altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC) && altsd->bInterfaceSubClass == USB_SUBCLASS_MIDISTREAMING) { int err = snd_usb_midi_v2_create(chip, iface, NULL, chip->usb_id); if (err < 0) { dev_err(&dev->dev, "%u:%d: cannot create sequencer device\n", ctrlif, interface); return -EINVAL; } return usb_driver_claim_interface(&usb_audio_driver, iface, USB_AUDIO_IFACE_UNUSED); } if ((altsd->bInterfaceClass != USB_CLASS_AUDIO && altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) || altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIOSTREAMING) { dev_dbg(&dev->dev, "%u:%d: skipping non-supported interface %d\n", ctrlif, interface, altsd->bInterfaceClass); /* skip non-supported classes */ return -EINVAL; } if (snd_usb_get_speed(dev) == USB_SPEED_LOW) { dev_err(&dev->dev, "low speed audio streaming not supported\n"); return -EINVAL; } snd_usb_add_ctrl_interface_link(chip, interface, ctrlif); if (! snd_usb_parse_audio_interface(chip, interface)) { usb_set_interface(dev, interface, 0); /* reset the current interface */ return usb_driver_claim_interface(&usb_audio_driver, iface, USB_AUDIO_IFACE_UNUSED); } return 0; } /* * parse audio control descriptor and create pcm/midi streams */ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) { struct usb_device *dev = chip->dev; struct usb_host_interface *host_iface; struct usb_interface_descriptor *altsd; int i, protocol; /* find audiocontrol interface */ host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0]; altsd = get_iface_desc(host_iface); protocol = altsd->bInterfaceProtocol; switch (protocol) { default: dev_warn(&dev->dev, "unknown interface protocol %#02x, assuming v1\n", protocol); fallthrough; case UAC_VERSION_1: { struct uac1_ac_header_descriptor *h1; int rest_bytes; h1 = snd_usb_find_csint_desc(host_iface->extra, host_iface->extralen, NULL, UAC_HEADER); if (!h1 || h1->bLength < sizeof(*h1)) { dev_err(&dev->dev, "cannot find UAC_HEADER\n"); return -EINVAL; } rest_bytes = (void *)(host_iface->extra + host_iface->extralen) - (void *)h1; /* just to be sure -- this shouldn't hit at all */ if (rest_bytes <= 0) { dev_err(&dev->dev, "invalid control header\n"); return -EINVAL; } if (rest_bytes < sizeof(*h1)) { dev_err(&dev->dev, "too short v1 buffer descriptor\n"); return -EINVAL; } if (!h1->bInCollection) { dev_info(&dev->dev, "skipping empty audio interface (v1)\n"); return -EINVAL; } if (rest_bytes < h1->bLength) { dev_err(&dev->dev, "invalid buffer length (v1)\n"); return -EINVAL; } if (h1->bLength < sizeof(*h1) + h1->bInCollection) { dev_err(&dev->dev, "invalid UAC_HEADER (v1)\n"); return -EINVAL; } for (i = 0; i < h1->bInCollection; i++) snd_usb_create_stream(chip, ctrlif, h1->baInterfaceNr[i]); break; } case UAC_VERSION_2: case UAC_VERSION_3: { struct usb_interface_assoc_descriptor *assoc = usb_ifnum_to_if(dev, ctrlif)->intf_assoc; if (!assoc) { /* * Firmware writers cannot count to three. So to find * the IAD on the NuForce UDH-100, also check the next * interface. */ struct usb_interface *iface = usb_ifnum_to_if(dev, ctrlif + 1); if (iface && iface->intf_assoc && iface->intf_assoc->bFunctionClass == USB_CLASS_AUDIO && iface->intf_assoc->bFunctionProtocol == UAC_VERSION_2) assoc = iface->intf_assoc; } if (!assoc) { dev_err(&dev->dev, "Audio class v2/v3 interfaces need an interface association\n"); return -EINVAL; } if (protocol == UAC_VERSION_3) { int badd = assoc->bFunctionSubClass; if (badd != UAC3_FUNCTION_SUBCLASS_FULL_ADC_3_0 && (badd < UAC3_FUNCTION_SUBCLASS_GENERIC_IO || badd > UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE)) { dev_err(&dev->dev, "Unsupported UAC3 BADD profile\n"); return -EINVAL; } chip->badd_profile = badd; } for (i = 0; i < assoc->bInterfaceCount; i++) { int intf = assoc->bFirstInterface + i; if (intf != ctrlif) snd_usb_create_stream(chip, ctrlif, intf); } break; } } return 0; } /* * Profile name preset table */ struct usb_audio_device_name { u32 id; const char *vendor_name; const char *product_name; const char *profile_name; /* override card->longname */ }; #define PROFILE_NAME(vid, pid, vendor, product, profile) \ { .id = USB_ID(vid, pid), .vendor_name = (vendor), \ .product_name = (product), .profile_name = (profile) } #define DEVICE_NAME(vid, pid, vendor, product) \ PROFILE_NAME(vid, pid, vendor, product, NULL) /* vendor/product and profile name presets, sorted in device id order */ static const struct usb_audio_device_name usb_audio_names[] = { /* HP Thunderbolt Dock Audio Headset */ PROFILE_NAME(0x03f0, 0x0269, "HP", "Thunderbolt Dock Audio Headset", "HP-Thunderbolt-Dock-Audio-Headset"), /* HP Thunderbolt Dock Audio Module */ PROFILE_NAME(0x03f0, 0x0567, "HP", "Thunderbolt Dock Audio Module", "HP-Thunderbolt-Dock-Audio-Module"), /* Two entries for Gigabyte TRX40 Aorus Master: * TRX40 Aorus Master has two USB-audio devices, one for the front * headphone with ESS SABRE9218 DAC chip, while another for the rest * I/O (the rear panel and the front mic) with Realtek ALC1220-VB. * Here we provide two distinct names for making UCM profiles easier. */ PROFILE_NAME(0x0414, 0xa000, "Gigabyte", "Aorus Master Front Headphone", "Gigabyte-Aorus-Master-Front-Headphone"), PROFILE_NAME(0x0414, 0xa001, "Gigabyte", "Aorus Master Main Audio", "Gigabyte-Aorus-Master-Main-Audio"), /* Gigabyte TRX40 Aorus Pro WiFi */ PROFILE_NAME(0x0414, 0xa002, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* Creative/E-Mu devices */ DEVICE_NAME(0x041e, 0x3010, "Creative Labs", "Sound Blaster MP3+"), /* Creative/Toshiba Multimedia Center SB-0500 */ DEVICE_NAME(0x041e, 0x3048, "Toshiba", "SB-0500"), /* Logitech Audio Devices */ DEVICE_NAME(0x046d, 0x0867, "Logitech, Inc.", "Logi-MeetUp"), DEVICE_NAME(0x046d, 0x0874, "Logitech, Inc.", "Logi-Tap-Audio"), DEVICE_NAME(0x046d, 0x087c, "Logitech, Inc.", "Logi-Huddle"), DEVICE_NAME(0x046d, 0x0898, "Logitech, Inc.", "Logi-RB-Audio"), DEVICE_NAME(0x046d, 0x08d2, "Logitech, Inc.", "Logi-RBM-Audio"), DEVICE_NAME(0x046d, 0x0990, "Logitech, Inc.", "QuickCam Pro 9000"), DEVICE_NAME(0x05e1, 0x0408, "Syntek", "STK1160"), DEVICE_NAME(0x05e1, 0x0480, "Hauppauge", "Woodbury"), /* ASUS ROG Zenith II: this machine has also two devices, one for * the front headphone and another for the rest */ PROFILE_NAME(0x0b05, 0x1915, "ASUS", "Zenith II Front Headphone", "Zenith-II-Front-Headphone"), PROFILE_NAME(0x0b05, 0x1916, "ASUS", "Zenith II Main Audio", "Zenith-II-Main-Audio"), /* ASUS ROG Strix */ PROFILE_NAME(0x0b05, 0x1917, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* ASUS PRIME TRX40 PRO-S */ PROFILE_NAME(0x0b05, 0x1918, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* Dell WD15 Dock */ PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"), /* Dell WD19 Dock */ PROFILE_NAME(0x0bda, 0x402e, "Dell", "WD19 Dock", "Dell-WD15-Dock"), DEVICE_NAME(0x0ccd, 0x0028, "TerraTec", "Aureon5.1MkII"), /* * The original product_name is "USB Sound Device", however this name * is also used by the CM106 based cards, so make it unique. */ DEVICE_NAME(0x0d8c, 0x0102, NULL, "ICUSBAUDIO7D"), DEVICE_NAME(0x0d8c, 0x0103, NULL, "Audio Advantage MicroII"), /* MSI TRX40 Creator */ PROFILE_NAME(0x0db0, 0x0d64, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* MSI TRX40 */ PROFILE_NAME(0x0db0, 0x543d, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), DEVICE_NAME(0x0fd9, 0x0008, "Hauppauge", "HVR-950Q"), /* Dock/Stand for HP Engage Go */ PROFILE_NAME(0x103c, 0x830a, "HP", "HP Engage Go Dock", "HP-Engage-Go-Dock"), /* Stanton/N2IT Final Scratch v1 device ('Scratchamp') */ DEVICE_NAME(0x103d, 0x0100, "Stanton", "ScratchAmp"), DEVICE_NAME(0x103d, 0x0101, "Stanton", "ScratchAmp"), /* aka. Serato Scratch Live DJ Box */ DEVICE_NAME(0x13e5, 0x0001, "Rane", "SL-1"), /* Lenovo ThinkStation P620 Rear Line-in, Line-out and Microphone */ PROFILE_NAME(0x17aa, 0x1046, "Lenovo", "ThinkStation P620 Rear", "Lenovo-ThinkStation-P620-Rear"), /* Lenovo ThinkStation P620 Internal Speaker + Front Headset */ PROFILE_NAME(0x17aa, 0x104d, "Lenovo", "ThinkStation P620 Main", "Lenovo-ThinkStation-P620-Main"), /* Asrock TRX40 Creator */ PROFILE_NAME(0x26ce, 0x0a01, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), DEVICE_NAME(0x2040, 0x7200, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7201, "Hauppauge", "HVR-950Q-MXL"), DEVICE_NAME(0x2040, 0x7210, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7211, "Hauppauge", "HVR-950Q-MXL"), DEVICE_NAME(0x2040, 0x7213, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7217, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x721b, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x721e, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x721f, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7240, "Hauppauge", "HVR-850"), DEVICE_NAME(0x2040, 0x7260, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7280, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7281, "Hauppauge", "HVR-950Q-MXL"), DEVICE_NAME(0x2040, 0x8200, "Hauppauge", "Woodbury"), { } /* terminator */ }; static const struct usb_audio_device_name * lookup_device_name(u32 id) { static const struct usb_audio_device_name *p; for (p = usb_audio_names; p->id; p++) if (p->id == id) return p; return NULL; } /* * free the chip instance * * here we have to do not much, since pcm and controls are already freed * */ static void snd_usb_audio_free(struct snd_card *card) { struct snd_usb_audio *chip = card->private_data; snd_usb_endpoint_free_all(chip); snd_usb_midi_v2_free_all(chip); mutex_destroy(&chip->mutex); if (!atomic_read(&chip->shutdown)) dev_set_drvdata(&chip->dev->dev, NULL); } static void usb_audio_make_shortname(struct usb_device *dev, struct snd_usb_audio *chip, const struct snd_usb_audio_quirk *quirk) { struct snd_card *card = chip->card; const struct usb_audio_device_name *preset; const char *s = NULL; preset = lookup_device_name(chip->usb_id); if (preset && preset->product_name) s = preset->product_name; else if (quirk && quirk->product_name) s = quirk->product_name; if (s && *s) { strscpy(card->shortname, s, sizeof(card->shortname)); return; } /* retrieve the device string as shortname */ if (!dev->descriptor.iProduct || usb_string(dev, dev->descriptor.iProduct, card->shortname, sizeof(card->shortname)) <= 0) { /* no name available from anywhere, so use ID */ sprintf(card->shortname, "USB Device %#04x:%#04x", USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id)); } strim(card->shortname); } static void usb_audio_make_longname(struct usb_device *dev, struct snd_usb_audio *chip, const struct snd_usb_audio_quirk *quirk) { struct snd_card *card = chip->card; const struct usb_audio_device_name *preset; const char *s = NULL; int len; preset = lookup_device_name(chip->usb_id); /* shortcut - if any pre-defined string is given, use it */ if (preset && preset->profile_name) s = preset->profile_name; if (s && *s) { strscpy(card->longname, s, sizeof(card->longname)); return; } if (preset && preset->vendor_name) s = preset->vendor_name; else if (quirk && quirk->vendor_name) s = quirk->vendor_name; *card->longname = 0; if (s && *s) { strscpy(card->longname, s, sizeof(card->longname)); } else { /* retrieve the vendor and device strings as longname */ if (dev->descriptor.iManufacturer) usb_string(dev, dev->descriptor.iManufacturer, card->longname, sizeof(card->longname)); /* we don't really care if there isn't any vendor string */ } if (*card->longname) { strim(card->longname); if (*card->longname) strlcat(card->longname, " ", sizeof(card->longname)); } strlcat(card->longname, card->shortname, sizeof(card->longname)); len = strlcat(card->longname, " at ", sizeof(card->longname)); if (len < sizeof(card->longname)) usb_make_path(dev, card->longname + len, sizeof(card->longname) - len); switch (snd_usb_get_speed(dev)) { case USB_SPEED_LOW: strlcat(card->longname, ", low speed", sizeof(card->longname)); break; case USB_SPEED_FULL: strlcat(card->longname, ", full speed", sizeof(card->longname)); break; case USB_SPEED_HIGH: strlcat(card->longname, ", high speed", sizeof(card->longname)); break; case USB_SPEED_SUPER: strlcat(card->longname, ", super speed", sizeof(card->longname)); break; case USB_SPEED_SUPER_PLUS: strlcat(card->longname, ", super speed plus", sizeof(card->longname)); break; default: break; } } /* * create a chip instance and set its names. */ static int snd_usb_audio_create(struct usb_interface *intf, struct usb_device *dev, int idx, const struct snd_usb_audio_quirk *quirk, unsigned int usb_id, struct snd_usb_audio **rchip) { struct snd_card *card; struct snd_usb_audio *chip; int err; char component[14]; *rchip = NULL; switch (snd_usb_get_speed(dev)) { case USB_SPEED_LOW: case USB_SPEED_FULL: case USB_SPEED_HIGH: case USB_SPEED_SUPER: case USB_SPEED_SUPER_PLUS: break; default: dev_err(&dev->dev, "unknown device speed %d\n", snd_usb_get_speed(dev)); return -ENXIO; } err = snd_card_new(&intf->dev, index[idx], id[idx], THIS_MODULE, sizeof(*chip), &card); if (err < 0) { dev_err(&dev->dev, "cannot create card instance %d\n", idx); return err; } chip = card->private_data; mutex_init(&chip->mutex); init_waitqueue_head(&chip->shutdown_wait); chip->index = idx; chip->dev = dev; chip->card = card; chip->setup = device_setup[idx]; chip->generic_implicit_fb = implicit_fb[idx]; chip->autoclock = autoclock; chip->lowlatency = lowlatency; atomic_set(&chip->active, 1); /* avoid autopm during probing */ atomic_set(&chip->usage_count, 0); atomic_set(&chip->shutdown, 0); chip->usb_id = usb_id; INIT_LIST_HEAD(&chip->pcm_list); INIT_LIST_HEAD(&chip->ep_list); INIT_LIST_HEAD(&chip->iface_ref_list); INIT_LIST_HEAD(&chip->clock_ref_list); INIT_LIST_HEAD(&chip->midi_list); INIT_LIST_HEAD(&chip->midi_v2_list); INIT_LIST_HEAD(&chip->mixer_list); if (quirk_flags[idx]) chip->quirk_flags = quirk_flags[idx]; else snd_usb_init_quirk_flags(chip); card->private_free = snd_usb_audio_free; strcpy(card->driver, "USB-Audio"); sprintf(component, "USB%04x:%04x", USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id)); snd_component_add(card, component); usb_audio_make_shortname(dev, chip, quirk); usb_audio_make_longname(dev, chip, quirk); snd_usb_audio_create_proc(chip); *rchip = chip; return 0; } /* look for a matching quirk alias id */ static bool get_alias_id(struct usb_device *dev, unsigned int *id) { int i; unsigned int src, dst; for (i = 0; i < ARRAY_SIZE(quirk_alias); i++) { if (!quirk_alias[i] || sscanf(quirk_alias[i], "%x:%x", &src, &dst) != 2 || src != *id) continue; dev_info(&dev->dev, "device (%04x:%04x): applying quirk alias %04x:%04x\n", USB_ID_VENDOR(*id), USB_ID_PRODUCT(*id), USB_ID_VENDOR(dst), USB_ID_PRODUCT(dst)); *id = dst; return true; } return false; } static int check_delayed_register_option(struct snd_usb_audio *chip) { int i; unsigned int id, inum; for (i = 0; i < ARRAY_SIZE(delayed_register); i++) { if (delayed_register[i] && sscanf(delayed_register[i], "%x:%x", &id, &inum) == 2 && id == chip->usb_id) return inum; } return -1; } static const struct usb_device_id usb_audio_ids[]; /* defined below */ /* look for the last interface that matches with our ids and remember it */ static void find_last_interface(struct snd_usb_audio *chip) { struct usb_host_config *config = chip->dev->actconfig; struct usb_interface *intf; int i; if (!config) return; for (i = 0; i < config->desc.bNumInterfaces; i++) { intf = config->interface[i]; if (usb_match_id(intf, usb_audio_ids)) chip->last_iface = intf->altsetting[0].desc.bInterfaceNumber; } usb_audio_dbg(chip, "Found last interface = %d\n", chip->last_iface); } /* look for the corresponding quirk */ static const struct snd_usb_audio_quirk * get_alias_quirk(struct usb_device *dev, unsigned int id) { const struct usb_device_id *p; for (p = usb_audio_ids; p->match_flags; p++) { /* FIXME: this checks only vendor:product pair in the list */ if ((p->match_flags & USB_DEVICE_ID_MATCH_DEVICE) == USB_DEVICE_ID_MATCH_DEVICE && p->idVendor == USB_ID_VENDOR(id) && p->idProduct == USB_ID_PRODUCT(id)) return (const struct snd_usb_audio_quirk *)p->driver_info; } return NULL; } /* register card if we reach to the last interface or to the specified * one given via option */ static int try_to_register_card(struct snd_usb_audio *chip, int ifnum) { if (check_delayed_register_option(chip) == ifnum || chip->last_iface == ifnum || usb_interface_claimed(usb_ifnum_to_if(chip->dev, chip->last_iface))) return snd_card_register(chip->card); return 0; } /* * probe the active usb device * * note that this can be called multiple times per a device, when it * includes multiple audio control interfaces. * * thus we check the usb device pointer and creates the card instance * only at the first time. the successive calls of this function will * append the pcm interface to the corresponding card. */ static int usb_audio_probe(struct usb_interface *intf, const struct usb_device_id *usb_id) { struct usb_device *dev = interface_to_usbdev(intf); const struct snd_usb_audio_quirk *quirk = (const struct snd_usb_audio_quirk *)usb_id->driver_info; struct snd_usb_audio *chip; int i, err; struct usb_host_interface *alts; int ifnum; u32 id; alts = &intf->altsetting[0]; ifnum = get_iface_desc(alts)->bInterfaceNumber; id = USB_ID(le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); if (get_alias_id(dev, &id)) quirk = get_alias_quirk(dev, id); if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) return -ENXIO; if (quirk && quirk->ifnum == QUIRK_NODEV_INTERFACE) return -ENODEV; err = snd_usb_apply_boot_quirk(dev, intf, quirk, id); if (err < 0) return err; /* * found a config. now register to ALSA */ /* check whether it's already registered */ chip = NULL; mutex_lock(&register_mutex); for (i = 0; i < SNDRV_CARDS; i++) { if (usb_chip[i] && usb_chip[i]->dev == dev) { if (atomic_read(&usb_chip[i]->shutdown)) { dev_err(&dev->dev, "USB device is in the shutdown state, cannot create a card instance\n"); err = -EIO; goto __error; } chip = usb_chip[i]; atomic_inc(&chip->active); /* avoid autopm */ break; } } if (! chip) { err = snd_usb_apply_boot_quirk_once(dev, intf, quirk, id); if (err < 0) goto __error; /* it's a fresh one. * now look for an empty slot and create a new card instance */ for (i = 0; i < SNDRV_CARDS; i++) if (!usb_chip[i] && (vid[i] == -1 || vid[i] == USB_ID_VENDOR(id)) && (pid[i] == -1 || pid[i] == USB_ID_PRODUCT(id))) { if (enable[i]) { err = snd_usb_audio_create(intf, dev, i, quirk, id, &chip); if (err < 0) goto __error; break; } else if (vid[i] != -1 || pid[i] != -1) { dev_info(&dev->dev, "device (%04x:%04x) is disabled\n", USB_ID_VENDOR(id), USB_ID_PRODUCT(id)); err = -ENOENT; goto __error; } } if (!chip) { dev_err(&dev->dev, "no available usb audio device\n"); err = -ENODEV; goto __error; } find_last_interface(chip); } if (chip->num_interfaces >= MAX_CARD_INTERFACES) { dev_info(&dev->dev, "Too many interfaces assigned to the single USB-audio card\n"); err = -EINVAL; goto __error; } dev_set_drvdata(&dev->dev, chip); if (ignore_ctl_error) chip->quirk_flags |= QUIRK_FLAG_IGNORE_CTL_ERROR; if (chip->quirk_flags & QUIRK_FLAG_DISABLE_AUTOSUSPEND) usb_disable_autosuspend(interface_to_usbdev(intf)); /* * For devices with more than one control interface, we assume the * first contains the audio controls. We might need a more specific * check here in the future. */ if (!chip->ctrl_intf) chip->ctrl_intf = alts; err = 1; /* continue */ if (quirk && quirk->ifnum != QUIRK_NO_INTERFACE) { /* need some special handlings */ err = snd_usb_create_quirk(chip, intf, &usb_audio_driver, quirk); if (err < 0) goto __error; } if (err > 0) { /* create normal USB audio interfaces */ err = snd_usb_create_streams(chip, ifnum); if (err < 0) goto __error; err = snd_usb_create_mixer(chip, ifnum); if (err < 0) goto __error; } if (chip->need_delayed_register) { dev_info(&dev->dev, "Found post-registration device assignment: %08x:%02x\n", chip->usb_id, ifnum); chip->need_delayed_register = false; /* clear again */ } err = try_to_register_card(chip, ifnum); if (err < 0) goto __error_no_register; if (chip->quirk_flags & QUIRK_FLAG_SHARE_MEDIA_DEVICE) { /* don't want to fail when snd_media_device_create() fails */ snd_media_device_create(chip, intf); } if (quirk) chip->quirk_type = quirk->type; usb_chip[chip->index] = chip; chip->intf[chip->num_interfaces] = intf; chip->num_interfaces++; usb_set_intfdata(intf, chip); atomic_dec(&chip->active); mutex_unlock(&register_mutex); return 0; __error: /* in the case of error in secondary interface, still try to register */ if (chip) try_to_register_card(chip, ifnum); __error_no_register: if (chip) { /* chip->active is inside the chip->card object, * decrement before memory is possibly returned. */ atomic_dec(&chip->active); if (!chip->num_interfaces) snd_card_free(chip->card); } mutex_unlock(&register_mutex); return err; } /* * we need to take care of counter, since disconnection can be called also * many times as well as usb_audio_probe(). */ static void usb_audio_disconnect(struct usb_interface *intf) { struct snd_usb_audio *chip = usb_get_intfdata(intf); struct snd_card *card; struct list_head *p; if (chip == USB_AUDIO_IFACE_UNUSED) return; card = chip->card; mutex_lock(&register_mutex); if (atomic_inc_return(&chip->shutdown) == 1) { struct snd_usb_stream *as; struct snd_usb_endpoint *ep; struct usb_mixer_interface *mixer; /* wait until all pending tasks done; * they are protected by snd_usb_lock_shutdown() */ wait_event(chip->shutdown_wait, !atomic_read(&chip->usage_count)); snd_card_disconnect(card); /* release the pcm resources */ list_for_each_entry(as, &chip->pcm_list, list) { snd_usb_stream_disconnect(as); } /* release the endpoint resources */ list_for_each_entry(ep, &chip->ep_list, list) { snd_usb_endpoint_release(ep); } /* release the midi resources */ list_for_each(p, &chip->midi_list) { snd_usbmidi_disconnect(p); } snd_usb_midi_v2_disconnect_all(chip); /* * Nice to check quirk && quirk->shares_media_device and * then call the snd_media_device_delete(). Don't have * access to the quirk here. snd_media_device_delete() * accesses mixer_list */ snd_media_device_delete(chip); /* release mixer resources */ list_for_each_entry(mixer, &chip->mixer_list, list) { snd_usb_mixer_disconnect(mixer); } } if (chip->quirk_flags & QUIRK_FLAG_DISABLE_AUTOSUSPEND) usb_enable_autosuspend(interface_to_usbdev(intf)); chip->num_interfaces--; if (chip->num_interfaces <= 0) { usb_chip[chip->index] = NULL; mutex_unlock(&register_mutex); snd_card_free_when_closed(card); } else { mutex_unlock(&register_mutex); } } /* lock the shutdown (disconnect) task and autoresume */ int snd_usb_lock_shutdown(struct snd_usb_audio *chip) { int err; atomic_inc(&chip->usage_count); if (atomic_read(&chip->shutdown)) { err = -EIO; goto error; } err = snd_usb_autoresume(chip); if (err < 0) goto error; return 0; error: if (atomic_dec_and_test(&chip->usage_count)) wake_up(&chip->shutdown_wait); return err; } /* autosuspend and unlock the shutdown */ void snd_usb_unlock_shutdown(struct snd_usb_audio *chip) { snd_usb_autosuspend(chip); if (atomic_dec_and_test(&chip->usage_count)) wake_up(&chip->shutdown_wait); } int snd_usb_autoresume(struct snd_usb_audio *chip) { int i, err; if (atomic_read(&chip->shutdown)) return -EIO; if (atomic_inc_return(&chip->active) != 1) return 0; for (i = 0; i < chip->num_interfaces; i++) { err = usb_autopm_get_interface(chip->intf[i]); if (err < 0) { /* rollback */ while (--i >= 0) usb_autopm_put_interface(chip->intf[i]); atomic_dec(&chip->active); return err; } } return 0; } void snd_usb_autosuspend(struct snd_usb_audio *chip) { int i; if (atomic_read(&chip->shutdown)) return; if (!atomic_dec_and_test(&chip->active)) return; for (i = 0; i < chip->num_interfaces; i++) usb_autopm_put_interface(chip->intf[i]); } static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message) { struct snd_usb_audio *chip = usb_get_intfdata(intf); struct snd_usb_stream *as; struct snd_usb_endpoint *ep; struct usb_mixer_interface *mixer; struct list_head *p; if (chip == USB_AUDIO_IFACE_UNUSED) return 0; if (!chip->num_suspended_intf++) { list_for_each_entry(as, &chip->pcm_list, list) snd_usb_pcm_suspend(as); list_for_each_entry(ep, &chip->ep_list, list) snd_usb_endpoint_suspend(ep); list_for_each(p, &chip->midi_list) snd_usbmidi_suspend(p); list_for_each_entry(mixer, &chip->mixer_list, list) snd_usb_mixer_suspend(mixer); snd_usb_midi_v2_suspend_all(chip); } if (!PMSG_IS_AUTO(message) && !chip->system_suspend) { snd_power_change_state(chip->card, SNDRV_CTL_POWER_D3hot); chip->system_suspend = chip->num_suspended_intf; } return 0; } static int usb_audio_resume(struct usb_interface *intf) { struct snd_usb_audio *chip = usb_get_intfdata(intf); struct snd_usb_stream *as; struct usb_mixer_interface *mixer; struct list_head *p; int err = 0; if (chip == USB_AUDIO_IFACE_UNUSED) return 0; atomic_inc(&chip->active); /* avoid autopm */ if (chip->num_suspended_intf > 1) goto out; list_for_each_entry(as, &chip->pcm_list, list) { err = snd_usb_pcm_resume(as); if (err < 0) goto err_out; } /* * ALSA leaves material resumption to user space * we just notify and restart the mixers */ list_for_each_entry(mixer, &chip->mixer_list, list) { err = snd_usb_mixer_resume(mixer); if (err < 0) goto err_out; } list_for_each(p, &chip->midi_list) { snd_usbmidi_resume(p); } snd_usb_midi_v2_resume_all(chip); out: if (chip->num_suspended_intf == chip->system_suspend) { snd_power_change_state(chip->card, SNDRV_CTL_POWER_D0); chip->system_suspend = 0; } chip->num_suspended_intf--; err_out: atomic_dec(&chip->active); /* allow autopm after this point */ return err; } static const struct usb_device_id usb_audio_ids [] = { #include "quirks-table.h" { .match_flags = (USB_DEVICE_ID_MATCH_INT_CLASS | USB_DEVICE_ID_MATCH_INT_SUBCLASS), .bInterfaceClass = USB_CLASS_AUDIO, .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usb_audio_ids); /* * entry point for linux usb interface */ static struct usb_driver usb_audio_driver = { .name = "snd-usb-audio", .probe = usb_audio_probe, .disconnect = usb_audio_disconnect, .suspend = usb_audio_suspend, .resume = usb_audio_resume, .reset_resume = usb_audio_resume, .id_table = usb_audio_ids, .supports_autosuspend = 1, }; module_usb_driver(usb_audio_driver);
22 15 22 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 /* * linux/fs/nls/nls_cp874.c * * Charset cp874 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2026, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 0x90*/ 0x0000, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 0xa0*/ 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, /* 0xb0*/ 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, /* 0xc0*/ 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, /* 0xd0*/ 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, /* 0xe0*/ 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, /* 0xf0*/ 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char page0e[256] = { 0x00, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0x00-0x07 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0x08-0x0f */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0x10-0x17 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0x18-0x1f */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0x20-0x27 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0x28-0x2f */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0x30-0x37 */ 0xd8, 0xd9, 0xda, 0x00, 0x00, 0x00, 0x00, 0xdf, /* 0x38-0x3f */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0x40-0x47 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0x48-0x4f */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0x50-0x57 */ 0xf8, 0xf9, 0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x91, 0x92, 0x00, 0x00, 0x93, 0x94, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page0e, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0x00, 0x00, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0x00, 0x00, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp874", .alias = "tis-620", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp874(void) { return register_nls(&table); } static void __exit exit_nls_cp874(void) { unregister_nls(&table); } module_init(init_nls_cp874) module_exit(exit_nls_cp874) MODULE_DESCRIPTION("NLS Thai charset (CP874, TIS-620)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(tis-620);
2 2 2 2 1 2 1 2 2 2 2 2 1 2 2 1 1 2 1 2 2 2 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 // SPDX-License-Identifier: GPL-2.0-only /* * TCP Low Priority (TCP-LP) * * TCP Low Priority is a distributed algorithm whose goal is to utilize only * the excess network bandwidth as compared to the ``fair share`` of * bandwidth as targeted by TCP. * * As of 2.6.13, Linux supports pluggable congestion control algorithms. * Due to the limitation of the API, we take the following changes from * the original TCP-LP implementation: * o We use newReno in most core CA handling. Only add some checking * within cong_avoid. * o Error correcting in remote HZ, therefore remote HZ will be keeped * on checking and updating. * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since * OWD have a similar meaning as RTT. Also correct the buggy formular. * o Handle reaction for Early Congestion Indication (ECI) within * pkts_acked, as mentioned within pseudo code. * o OWD is handled in relative format, where local time stamp will in * tcp_time_stamp format. * * Original Author: * Aleksandar Kuzmanovic <akuzma@northwestern.edu> * Available from: * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf * Original implementation for 2.4.19: * http://www-ece.rice.edu/networks/TCP-LP/ * * 2.6.x module Authors: * Wong Hoi Sing, Edison <hswong3i@gmail.com> * Hung Hing Lun, Mike <hlhung3i@gmail.com> * SourceForge project page: * http://tcp-lp-mod.sourceforge.net/ */ #include <linux/module.h> #include <net/tcp.h> /* resolution of owd */ #define LP_RESOL TCP_TS_HZ /** * enum tcp_lp_state * @LP_VALID_RHZ: is remote HZ valid? * @LP_VALID_OWD: is OWD valid? * @LP_WITHIN_THR: are we within threshold? * @LP_WITHIN_INF: are we within inference? * * TCP-LP's state flags. * We create this set of state flag mainly for debugging. */ enum tcp_lp_state { LP_VALID_RHZ = (1 << 0), LP_VALID_OWD = (1 << 1), LP_WITHIN_THR = (1 << 3), LP_WITHIN_INF = (1 << 4), }; /** * struct lp * @flag: TCP-LP state flag * @sowd: smoothed OWD << 3 * @owd_min: min OWD * @owd_max: max OWD * @owd_max_rsv: reserved max owd * @remote_hz: estimated remote HZ * @remote_ref_time: remote reference time * @local_ref_time: local reference time * @last_drop: time for last active drop * @inference: current inference * * TCP-LP's private struct. * We get the idea from original TCP-LP implementation where only left those we * found are really useful. */ struct lp { u32 flag; u32 sowd; u32 owd_min; u32 owd_max; u32 owd_max_rsv; u32 remote_hz; u32 remote_ref_time; u32 local_ref_time; u32 last_drop; u32 inference; }; /** * tcp_lp_init * @sk: socket to initialize congestion control algorithm for * * Init all required variables. * Clone the handling from Vegas module implementation. */ static void tcp_lp_init(struct sock *sk) { struct lp *lp = inet_csk_ca(sk); lp->flag = 0; lp->sowd = 0; lp->owd_min = 0xffffffff; lp->owd_max = 0; lp->owd_max_rsv = 0; lp->remote_hz = 0; lp->remote_ref_time = 0; lp->local_ref_time = 0; lp->last_drop = 0; lp->inference = 0; } /** * tcp_lp_cong_avoid * @sk: socket to avoid congesting * * Implementation of cong_avoid. * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) tcp_reno_cong_avoid(sk, ack, acked); } /** * tcp_lp_remote_hz_estimator * @sk: socket which needs an estimate for the remote HZs * * Estimate remote HZ. * We keep on updating the estimated value, where original TCP-LP * implementation only guest it for once and use forever. */ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */ s64 m = 0; /* not yet record reference time * go away!! record it before come back!! */ if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) goto out; /* we can't calc remote HZ with no different!! */ if (tp->rx_opt.rcv_tsval == lp->remote_ref_time || tp->rx_opt.rcv_tsecr == lp->local_ref_time) goto out; m = TCP_TS_HZ * (tp->rx_opt.rcv_tsval - lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - lp->local_ref_time); if (m < 0) m = -m; if (rhz > 0) { m -= rhz >> 6; /* m is now error in remote HZ est */ rhz += m; /* 63/64 old + 1/64 new */ } else rhz = m << 6; out: /* record time for successful remote HZ calc */ if ((rhz >> 6) > 0) lp->flag |= LP_VALID_RHZ; else lp->flag &= ~LP_VALID_RHZ; /* record reference time stamp */ lp->remote_ref_time = tp->rx_opt.rcv_tsval; lp->local_ref_time = tp->rx_opt.rcv_tsecr; return rhz >> 6; } /** * tcp_lp_owd_calculator * @sk: socket to calculate one way delay for * * Calculate one way delay (in relative format). * Original implement OWD as minus of remote time difference to local time * difference directly. As this time difference just simply equal to RTT, when * the network status is stable, remote RTT will equal to local RTT, and result * OWD into zero. * It seems to be a bug and so we fixed it. */ static u32 tcp_lp_owd_calculator(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); s64 owd = 0; lp->remote_hz = tcp_lp_remote_hz_estimator(sk); if (lp->flag & LP_VALID_RHZ) { owd = tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ); if (owd < 0) owd = -owd; } if (owd > 0) lp->flag |= LP_VALID_OWD; else lp->flag &= ~LP_VALID_OWD; return owd; } /** * tcp_lp_rtt_sample * @sk: socket to add a rtt sample to * @rtt: round trip time, which is ignored! * * Implementation or rtt_sample. * Will take the following action, * 1. calc OWD, * 2. record the min/max OWD, * 3. calc smoothed OWD (SOWD). * Most ideas come from the original TCP-LP implementation. */ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) { struct lp *lp = inet_csk_ca(sk); s64 mowd = tcp_lp_owd_calculator(sk); /* sorry that we don't have valid data */ if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) return; /* record the next min owd */ if (mowd < lp->owd_min) lp->owd_min = mowd; /* always forget the max of the max * we just set owd_max as one below it */ if (mowd > lp->owd_max) { if (mowd > lp->owd_max_rsv) { if (lp->owd_max_rsv == 0) lp->owd_max = mowd; else lp->owd_max = lp->owd_max_rsv; lp->owd_max_rsv = mowd; } else lp->owd_max = mowd; } /* calc for smoothed owd */ if (lp->sowd != 0) { mowd -= lp->sowd >> 3; /* m is now error in owd est */ lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */ } else lp->sowd = mowd << 3; /* take the measured time be owd */ } /** * tcp_lp_pkts_acked * @sk: socket requiring congestion avoidance calculations * * Implementation of pkts_acked. * Deal with active drop under Early Congestion Indication. * Only drop to half and 1 will be handle, because we hope to use back * newReno in increase case. * We work it out by following the idea from TCP-LP's paper directly */ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); u32 now = tcp_time_stamp_ts(tp); u32 delta; if (sample->rtt_us > 0) tcp_lp_rtt_sample(sk, sample->rtt_us); /* calc inference */ delta = now - tp->rx_opt.rcv_tsecr; if ((s32)delta > 0) lp->inference = 3 * delta; /* test if within inference */ if (lp->last_drop && (now - lp->last_drop < lp->inference)) lp->flag |= LP_WITHIN_INF; else lp->flag &= ~LP_WITHIN_INF; /* test if within threshold */ if (lp->sowd >> 3 < lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) lp->flag |= LP_WITHIN_THR; else lp->flag &= ~LP_WITHIN_THR; pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max, lp->sowd >> 3); if (lp->flag & LP_WITHIN_THR) return; /* FIXME: try to reset owd_min and owd_max here * so decrease the chance the min/max is no longer suitable * and will usually within threshold when within inference */ lp->owd_min = lp->sowd >> 3; lp->owd_max = lp->sowd >> 2; lp->owd_max_rsv = lp->sowd >> 2; /* happened within inference * drop snd_cwnd into 1 */ if (lp->flag & LP_WITHIN_INF) tcp_snd_cwnd_set(tp, 1U); /* happened after inference * cut snd_cwnd into half */ else tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U)); /* record this drop time */ lp->last_drop = now; } static struct tcp_congestion_ops tcp_lp __read_mostly = { .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_lp_cong_avoid, .pkts_acked = tcp_lp_pkts_acked, .owner = THIS_MODULE, .name = "lp" }; static int __init tcp_lp_register(void) { BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_lp); } static void __exit tcp_lp_unregister(void) { tcp_unregister_congestion_control(&tcp_lp); } module_init(tcp_lp_register); module_exit(tcp_lp_unregister); MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP Low Priority");
129 1 1 1 1 127 1 94 28 93 113 4 4 3 124 14 34 2 34 34 24 34 11 2 9 212 33 193 8 15 15 15 37 13 4 4 25 4 6 1 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/btree.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Handle opening/closing btree */ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/log2.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" /* * Initial source code of clump size calculation is gotten * from http://opensource.apple.com/tarballs/diskdev_cmds/ */ #define CLUMP_ENTRIES 15 static short clumptbl[CLUMP_ENTRIES * 3] = { /* * Volume Attributes Catalog Extents * Size Clump (MB) Clump (MB) Clump (MB) */ /* 1GB */ 4, 4, 4, /* 2GB */ 6, 6, 4, /* 4GB */ 8, 8, 4, /* 8GB */ 11, 11, 5, /* * For volumes 16GB and larger, we want to make sure that a full OS * install won't require fragmentation of the Catalog or Attributes * B-trees. We do this by making the clump sizes sufficiently large, * and by leaving a gap after the B-trees for them to grow into. * * For SnowLeopard 10A298, a FullNetInstall with all packages selected * results in: * Catalog B-tree Header * nodeSize: 8192 * totalNodes: 31616 * freeNodes: 1978 * (used = 231.55 MB) * Attributes B-tree Header * nodeSize: 8192 * totalNodes: 63232 * freeNodes: 958 * (used = 486.52 MB) * * We also want Time Machine backup volumes to have a sufficiently * large clump size to reduce fragmentation. * * The series of numbers for Catalog and Attribute form a geometric * series. For Catalog (16GB to 512GB), each term is 8**(1/5) times * the previous term. For Attributes (16GB to 512GB), each term is * 4**(1/5) times the previous term. For 1TB to 16TB, each term is * 2**(1/5) times the previous term. */ /* 16GB */ 64, 32, 5, /* 32GB */ 84, 49, 6, /* 64GB */ 111, 74, 7, /* 128GB */ 147, 111, 8, /* 256GB */ 194, 169, 9, /* 512GB */ 256, 256, 11, /* 1TB */ 294, 294, 14, /* 2TB */ 338, 338, 16, /* 4TB */ 388, 388, 20, /* 8TB */ 446, 446, 25, /* 16TB */ 512, 512, 32 }; u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size, u64 sectors, int file_id) { u32 mod = max(node_size, block_size); u32 clump_size; int column; int i; /* Figure out which column of the above table to use for this file. */ switch (file_id) { case HFSPLUS_ATTR_CNID: column = 0; break; case HFSPLUS_CAT_CNID: column = 1; break; default: column = 2; break; } /* * The default clump size is 0.8% of the volume size. And * it must also be a multiple of the node and block size. */ if (sectors < 0x200000) { clump_size = sectors << 2; /* 0.8 % */ if (clump_size < (8 * node_size)) clump_size = 8 * node_size; } else { /* turn exponent into table index... */ for (i = 0, sectors = sectors >> 22; sectors && (i < CLUMP_ENTRIES - 1); ++i, sectors = sectors >> 1) { /* empty body */ } clump_size = clumptbl[column + (i) * 3] * 1024 * 1024; } /* * Round the clump size to a multiple of node and block size. * NOTE: This rounds down. */ clump_size /= mod; clump_size *= mod; /* * Rounding down could have rounded down to 0 if the block size was * greater than the clump size. If so, just use one block or node. */ if (clump_size == 0) clump_size = mod; return clump_size; } /* Get a reference to a B*Tree and do some initial checks */ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) { struct hfs_btree *tree; struct hfs_btree_header_rec *head; struct address_space *mapping; struct inode *inode; struct page *page; unsigned int size; tree = kzalloc(sizeof(*tree), GFP_KERNEL); if (!tree) return NULL; mutex_init(&tree->tree_lock); spin_lock_init(&tree->hash_lock); tree->sb = sb; tree->cnid = id; inode = hfsplus_iget(sb, id); if (IS_ERR(inode)) goto free_tree; tree->inode = inode; if (!HFSPLUS_I(tree->inode)->first_blocks) { pr_err("invalid btree extent records (0 size)\n"); goto free_inode; } mapping = tree->inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) goto free_inode; /* Load the header */ head = (struct hfs_btree_header_rec *)(kmap_local_page(page) + sizeof(struct hfs_bnode_desc)); tree->root = be32_to_cpu(head->root); tree->leaf_count = be32_to_cpu(head->leaf_count); tree->leaf_head = be32_to_cpu(head->leaf_head); tree->leaf_tail = be32_to_cpu(head->leaf_tail); tree->node_count = be32_to_cpu(head->node_count); tree->free_nodes = be32_to_cpu(head->free_nodes); tree->attributes = be32_to_cpu(head->attributes); tree->node_size = be16_to_cpu(head->node_size); tree->max_key_len = be16_to_cpu(head->max_key_len); tree->depth = be16_to_cpu(head->depth); /* Verify the tree and set the correct compare function */ switch (id) { case HFSPLUS_EXT_CNID: if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { pr_err("invalid extent max_key_len %d\n", tree->max_key_len); goto fail_page; } if (tree->attributes & HFS_TREE_VARIDXKEYS) { pr_err("invalid extent btree flag\n"); goto fail_page; } tree->keycmp = hfsplus_ext_cmp_key; break; case HFSPLUS_CAT_CNID: if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { pr_err("invalid catalog max_key_len %d\n", tree->max_key_len); goto fail_page; } if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { pr_err("invalid catalog btree flag\n"); goto fail_page; } if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) && (head->key_type == HFSPLUS_KEY_BINARY)) tree->keycmp = hfsplus_cat_bin_cmp_key; else { tree->keycmp = hfsplus_cat_case_cmp_key; set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); } break; case HFSPLUS_ATTR_CNID: if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) { pr_err("invalid attributes max_key_len %d\n", tree->max_key_len); goto fail_page; } tree->keycmp = hfsplus_attr_bin_cmp_key; break; default: pr_err("unknown B*Tree requested\n"); goto fail_page; } if (!(tree->attributes & HFS_TREE_BIGKEYS)) { pr_err("invalid btree flag\n"); goto fail_page; } size = tree->node_size; if (!is_power_of_2(size)) goto fail_page; if (!tree->node_count) goto fail_page; tree->node_size_shift = ffs(size) - 1; tree->pages_per_bnode = (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT; kunmap_local(head); put_page(page); return tree; fail_page: kunmap_local(head); put_page(page); free_inode: tree->inode->i_mapping->a_ops = &hfsplus_aops; iput(tree->inode); free_tree: kfree(tree); return NULL; } /* Release resources used by a btree */ void hfs_btree_close(struct hfs_btree *tree) { struct hfs_bnode *node; int i; if (!tree) return; for (i = 0; i < NODE_HASH_SIZE; i++) { while ((node = tree->node_hash[i])) { tree->node_hash[i] = node->next_hash; if (atomic_read(&node->refcnt)) pr_crit("node %d:%d " "still has %d user(s)!\n", node->tree->cnid, node->this, atomic_read(&node->refcnt)); hfs_bnode_free(node); tree->node_hash_cnt--; } } iput(tree->inode); kfree(tree); } int hfs_btree_write(struct hfs_btree *tree) { struct hfs_btree_header_rec *head; struct hfs_bnode *node; struct page *page; node = hfs_bnode_find(tree, 0); if (IS_ERR(node)) /* panic? */ return -EIO; /* Load the header */ page = node->page[0]; head = (struct hfs_btree_header_rec *)(kmap_local_page(page) + sizeof(struct hfs_bnode_desc)); head->root = cpu_to_be32(tree->root); head->leaf_count = cpu_to_be32(tree->leaf_count); head->leaf_head = cpu_to_be32(tree->leaf_head); head->leaf_tail = cpu_to_be32(tree->leaf_tail); head->node_count = cpu_to_be32(tree->node_count); head->free_nodes = cpu_to_be32(tree->free_nodes); head->attributes = cpu_to_be32(tree->attributes); head->depth = cpu_to_be16(tree->depth); kunmap_local(head); set_page_dirty(page); hfs_bnode_put(node); return 0; } static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx) { struct hfs_btree *tree = prev->tree; struct hfs_bnode *node; struct hfs_bnode_desc desc; __be32 cnid; node = hfs_bnode_create(tree, idx); if (IS_ERR(node)) return node; tree->free_nodes--; prev->next = idx; cnid = cpu_to_be32(idx); hfs_bnode_write(prev, &cnid, offsetof(struct hfs_bnode_desc, next), 4); node->type = HFS_NODE_MAP; node->num_recs = 1; hfs_bnode_clear(node, 0, tree->node_size); desc.next = 0; desc.prev = 0; desc.type = HFS_NODE_MAP; desc.height = 0; desc.num_recs = cpu_to_be16(1); desc.reserved = 0; hfs_bnode_write(node, &desc, 0, sizeof(desc)); hfs_bnode_write_u16(node, 14, 0x8000); hfs_bnode_write_u16(node, tree->node_size - 2, 14); hfs_bnode_write_u16(node, tree->node_size - 4, tree->node_size - 6); return node; } /* Make sure @tree has enough space for the @rsvd_nodes */ int hfs_bmap_reserve(struct hfs_btree *tree, int rsvd_nodes) { struct inode *inode = tree->inode; struct hfsplus_inode_info *hip = HFSPLUS_I(inode); u32 count; int res; if (rsvd_nodes <= 0) return 0; while (tree->free_nodes < rsvd_nodes) { res = hfsplus_file_extend(inode, hfs_bnode_need_zeroout(tree)); if (res) return res; hip->phys_size = inode->i_size = (loff_t)hip->alloc_blocks << HFSPLUS_SB(tree->sb)->alloc_blksz_shift; hip->fs_blocks = hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; inode_set_bytes(inode, inode->i_size); count = inode->i_size >> tree->node_size_shift; tree->free_nodes += count - tree->node_count; tree->node_count = count; } return 0; } struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) { struct hfs_bnode *node, *next_node; struct page **pagep; u32 nidx, idx; unsigned off; u16 off16; u16 len; u8 *data, byte, m; int i, res; res = hfs_bmap_reserve(tree, 1); if (res) return ERR_PTR(res); nidx = 0; node = hfs_bnode_find(tree, nidx); if (IS_ERR(node)) return node; len = hfs_brec_lenoff(node, 2, &off16); off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_SHIFT); data = kmap_local_page(*pagep); off &= ~PAGE_MASK; idx = 0; for (;;) { while (len) { byte = data[off]; if (byte != 0xff) { for (m = 0x80, i = 0; i < 8; m >>= 1, i++) { if (!(byte & m)) { idx += i; data[off] |= m; set_page_dirty(*pagep); kunmap_local(data); tree->free_nodes--; mark_inode_dirty(tree->inode); hfs_bnode_put(node); return hfs_bnode_create(tree, idx); } } } if (++off >= PAGE_SIZE) { kunmap_local(data); data = kmap_local_page(*++pagep); off = 0; } idx += 8; len--; } kunmap_local(data); nidx = node->next; if (!nidx) { hfs_dbg(BNODE_MOD, "create new bmap node\n"); next_node = hfs_bmap_new_bmap(node, idx); } else next_node = hfs_bnode_find(tree, nidx); hfs_bnode_put(node); if (IS_ERR(next_node)) return next_node; node = next_node; len = hfs_brec_lenoff(node, 0, &off16); off = off16; off += node->page_offset; pagep = node->page + (off >> PAGE_SHIFT); data = kmap_local_page(*pagep); off &= ~PAGE_MASK; } } void hfs_bmap_free(struct hfs_bnode *node) { struct hfs_btree *tree; struct page *page; u16 off, len; u32 nidx; u8 *data, byte, m; hfs_dbg(BNODE_MOD, "btree_free_node: %u\n", node->this); BUG_ON(!node->this); tree = node->tree; nidx = node->this; node = hfs_bnode_find(tree, 0); if (IS_ERR(node)) return; len = hfs_brec_lenoff(node, 2, &off); while (nidx >= len * 8) { u32 i; nidx -= len * 8; i = node->next; if (!i) { /* panic */; pr_crit("unable to free bnode %u. " "bmap not found!\n", node->this); hfs_bnode_put(node); return; } hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; if (node->type != HFS_NODE_MAP) { /* panic */; pr_crit("invalid bmap found! " "(%u,%d)\n", node->this, node->type); hfs_bnode_put(node); return; } len = hfs_brec_lenoff(node, 0, &off); } off += node->page_offset + nidx / 8; page = node->page[off >> PAGE_SHIFT]; data = kmap_local_page(page); off &= ~PAGE_MASK; m = 1 << (~nidx & 7); byte = data[off]; if (!(byte & m)) { pr_crit("trying to free free bnode " "%u(%d)\n", node->this, node->type); kunmap_local(data); hfs_bnode_put(node); return; } data[off] = byte & ~m; set_page_dirty(page); kunmap_local(data); hfs_bnode_put(node); tree->free_nodes++; mark_inode_dirty(tree->inode); }
50 50 50 50 50 50 50 51 51 50 51 49 51 50 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007 * * Author: Eric Biederman <ebiederm@xmision.com> */ #include <linux/module.h> #include <linux/ipc.h> #include <linux/nsproxy.h> #include <linux/sysctl.h> #include <linux/uaccess.h> #include <linux/capability.h> #include <linux/ipc_namespace.h> #include <linux/msg.h> #include <linux/slab.h> #include <linux/cred.h> #include "util.h" static int proc_ipc_dointvec_minmax_orphans(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ipc_namespace *ns = container_of(table->data, struct ipc_namespace, shm_rmid_forced); int err; err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err < 0) return err; if (ns->shm_rmid_forced) shm_destroy_orphaned(ns); return err; } static int proc_ipc_auto_msgmni(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; int dummy = 0; memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = &dummy; if (write) pr_info_once("writing to auto_msgmni has no effect"); return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); } static int proc_ipc_sem_dointvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ipc_namespace *ns = container_of(table->data, struct ipc_namespace, sem_ctls); int ret, semmni; semmni = ns->sem_ctls[3]; ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret) ret = sem_check_semmni(ns); /* * Reset the semmni value if an error happens. */ if (ret) ns->sem_ctls[3] = semmni; return ret; } int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; static const struct ctl_table ipc_sysctls[] = { { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, .maxlen = sizeof(init_ipc_ns.shm_ctlmax), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, .maxlen = sizeof(init_ipc_ns.shm_ctlall), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, .maxlen = sizeof(init_ipc_ns.shm_ctlmni), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, { .procname = "shm_rmid_forced", .data = &init_ipc_ns.shm_rmid_forced, .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax_orphans, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof(init_ipc_ns.msg_ctlmax), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof(init_ipc_ns.msg_ctlmni), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, { .procname = "auto_msgmni", .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_ipc_auto_msgmni, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, { .procname = "sem", .data = &init_ipc_ns.sem_ctls, .maxlen = 4*sizeof(int), .mode = 0644, .proc_handler = proc_ipc_sem_dointvec, }, #ifdef CONFIG_CHECKPOINT_RESTORE { .procname = "sem_next_id", .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), .mode = 0444, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, { .procname = "msg_next_id", .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), .mode = 0444, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, { .procname = "shm_next_id", .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), .mode = 0444, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, #endif }; static struct ctl_table_set *set_lookup(struct ctl_table_root *root) { return &current->nsproxy->ipc_ns->ipc_set; } static int set_is_seen(struct ctl_table_set *set) { return &current->nsproxy->ipc_ns->ipc_set == set; } static void ipc_set_ownership(struct ctl_table_header *head, kuid_t *uid, kgid_t *gid) { struct ipc_namespace *ns = container_of(head->set, struct ipc_namespace, ipc_set); kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; } static int ipc_permissions(struct ctl_table_header *head, const struct ctl_table *table) { int mode = table->mode; #ifdef CONFIG_CHECKPOINT_RESTORE struct ipc_namespace *ns = container_of(head->set, struct ipc_namespace, ipc_set); if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || (table->data == &ns->ids[IPC_MSG_IDS].next_id) || (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && checkpoint_restore_ns_capable(ns->user_ns)) mode = 0666; else #endif { kuid_t ns_root_uid; kgid_t ns_root_gid; ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); if (uid_eq(current_euid(), ns_root_uid)) mode >>= 6; else if (in_egroup_p(ns_root_gid)) mode >>= 3; } mode &= 7; return (mode << 6) | (mode << 3) | mode; } static struct ctl_table_root set_root = { .lookup = set_lookup, .permissions = ipc_permissions, .set_ownership = ipc_set_ownership, }; bool setup_ipc_sysctls(struct ipc_namespace *ns) { struct ctl_table *tbl; setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); if (tbl) { int i; for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { if (tbl[i].data == &init_ipc_ns.shm_ctlmax) tbl[i].data = &ns->shm_ctlmax; else if (tbl[i].data == &init_ipc_ns.shm_ctlall) tbl[i].data = &ns->shm_ctlall; else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) tbl[i].data = &ns->shm_ctlmni; else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) tbl[i].data = &ns->shm_rmid_forced; else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) tbl[i].data = &ns->msg_ctlmax; else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) tbl[i].data = &ns->msg_ctlmni; else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) tbl[i].data = &ns->msg_ctlmnb; else if (tbl[i].data == &init_ipc_ns.sem_ctls) tbl[i].data = &ns->sem_ctls; #ifdef CONFIG_CHECKPOINT_RESTORE else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; #endif else tbl[i].data = NULL; } ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl, ARRAY_SIZE(ipc_sysctls)); } if (!ns->ipc_sysctls) { kfree(tbl); retire_sysctl_set(&ns->ipc_set); return false; } return true; } void retire_ipc_sysctls(struct ipc_namespace *ns) { const struct ctl_table *tbl; tbl = ns->ipc_sysctls->ctl_table_arg; unregister_sysctl_table(ns->ipc_sysctls); retire_sysctl_set(&ns->ipc_set); kfree(tbl); } static int __init ipc_sysctl_init(void) { if (!setup_ipc_sysctls(&init_ipc_ns)) { pr_warn("ipc sysctl registration failed\n"); return -ENOMEM; } return 0; } device_initcall(ipc_sysctl_init); static int __init ipc_mni_extend(char *str) { ipc_mni = IPCMNI_EXTEND; ipc_mni_shift = IPCMNI_EXTEND_SHIFT; ipc_min_cycle = IPCMNI_EXTEND_MIN_CYCLE; pr_info("IPCMNI extended to %d.\n", ipc_mni); return 0; } early_param("ipcmni_extend", ipc_mni_extend);
5 5 5 1 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtual PTP 1588 clock for use with KVM guests * * Copyright (C) 2017 Red Hat Inc. */ #include <linux/device.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/ptp_kvm.h> #include <uapi/linux/kvm_para.h> #include <asm/kvm_para.h> #include <uapi/asm/kvm_para.h> #include <linux/ptp_clock_kernel.h> struct kvm_ptp_clock { struct ptp_clock *ptp_clock; struct ptp_clock_info caps; }; static DEFINE_SPINLOCK(kvm_ptp_lock); static int ptp_kvm_get_time_fn(ktime_t *device_time, struct system_counterval_t *system_counter, void *ctx) { enum clocksource_ids cs_id; struct timespec64 tspec; u64 cycle; int ret; spin_lock(&kvm_ptp_lock); preempt_disable_notrace(); ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs_id); if (ret) { spin_unlock(&kvm_ptp_lock); preempt_enable_notrace(); return ret; } preempt_enable_notrace(); system_counter->cycles = cycle; system_counter->cs_id = cs_id; *device_time = timespec64_to_ktime(tspec); spin_unlock(&kvm_ptp_lock); return 0; } static int ptp_kvm_getcrosststamp(struct ptp_clock_info *ptp, struct system_device_crosststamp *xtstamp) { return get_device_system_crosststamp(ptp_kvm_get_time_fn, NULL, NULL, xtstamp); } /* * PTP clock operations */ static int ptp_kvm_adjfine(struct ptp_clock_info *ptp, long delta) { return -EOPNOTSUPP; } static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta) { return -EOPNOTSUPP; } static int ptp_kvm_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { return -EOPNOTSUPP; } static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { long ret; struct timespec64 tspec; spin_lock(&kvm_ptp_lock); ret = kvm_arch_ptp_get_clock(&tspec); if (ret) { spin_unlock(&kvm_ptp_lock); return ret; } spin_unlock(&kvm_ptp_lock); memcpy(ts, &tspec, sizeof(struct timespec64)); return 0; } static int ptp_kvm_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { return -EOPNOTSUPP; } static const struct ptp_clock_info ptp_kvm_caps = { .owner = THIS_MODULE, .name = "KVM virtual PTP", .max_adj = 0, .n_ext_ts = 0, .n_pins = 0, .pps = 0, .adjfine = ptp_kvm_adjfine, .adjtime = ptp_kvm_adjtime, .gettime64 = ptp_kvm_gettime, .settime64 = ptp_kvm_settime, .enable = ptp_kvm_enable, .getcrosststamp = ptp_kvm_getcrosststamp, }; /* module operations */ static struct kvm_ptp_clock kvm_ptp_clock; static void __exit ptp_kvm_exit(void) { ptp_clock_unregister(kvm_ptp_clock.ptp_clock); kvm_arch_ptp_exit(); } static int __init ptp_kvm_init(void) { long ret; ret = kvm_arch_ptp_init(); if (ret) { if (ret != -EOPNOTSUPP) pr_err("fail to initialize ptp_kvm"); return ret; } kvm_ptp_clock.caps = ptp_kvm_caps; kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock); } module_init(ptp_kvm_init); module_exit(ptp_kvm_exit); MODULE_AUTHOR("Marcelo Tosatti <mtosatti@redhat.com>"); MODULE_DESCRIPTION("PTP clock using KVMCLOCK"); MODULE_LICENSE("GPL");
48 65 94 19 19 18 125 125 6 119 2 2 997 996 537 407 82 128 95 527 528 53 128 128 482 84 84 82 2 1 84 319 84 539 536 528 76 533 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux network device link state notification * * Author: * Stefan Rompf <sux@loplof.de> */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/rtnetlink.h> #include <linux/jiffies.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/types.h> #include "dev.h" enum lw_bits { LW_URGENT = 0, }; static unsigned long linkwatch_flags; static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned int default_operstate(const struct net_device *dev) { if (netif_testing(dev)) return IF_OPER_TESTING; /* Some uppers (DSA) have additional sources for being down, so * first check whether lower is indeed the source of its down state. */ if (!netif_carrier_ok(dev)) { struct net_device *peer; int iflink; /* If called from netdev_run_todo()/linkwatch_sync_dev(), * dev_net(dev) can be already freed, and RTNL is not held. */ if (dev->reg_state <= NETREG_REGISTERED) iflink = dev_get_iflink(dev); else iflink = dev->ifindex; if (iflink == dev->ifindex) return IF_OPER_DOWN; ASSERT_RTNL(); peer = __dev_get_by_index(dev_net(dev), iflink); if (!peer) return IF_OPER_DOWN; return netif_carrier_ok(peer) ? IF_OPER_DOWN : IF_OPER_LOWERLAYERDOWN; } if (netif_dormant(dev)) return IF_OPER_DORMANT; return IF_OPER_UP; } static void rfc2863_policy(struct net_device *dev) { unsigned int operstate = default_operstate(dev); if (operstate == READ_ONCE(dev->operstate)) return; switch(dev->link_mode) { case IF_LINK_MODE_TESTING: if (operstate == IF_OPER_UP) operstate = IF_OPER_TESTING; break; case IF_LINK_MODE_DORMANT: if (operstate == IF_OPER_UP) operstate = IF_OPER_DORMANT; break; case IF_LINK_MODE_DEFAULT: default: break; } WRITE_ONCE(dev->operstate, operstate); } void linkwatch_init_dev(struct net_device *dev) { /* Handle pre-registration link state changes */ if (!netif_carrier_ok(dev) || netif_dormant(dev) || netif_testing(dev)) rfc2863_policy(dev); } static bool linkwatch_urgent_event(struct net_device *dev) { if (!netif_running(dev)) return false; if (dev->ifindex != dev_get_iflink(dev)) return true; if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } static void linkwatch_add_event(struct net_device *dev) { unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } static void linkwatch_schedule_work(int urgent) { unsigned long delay = linkwatch_nextevent - jiffies; if (test_bit(LW_URGENT, &linkwatch_flags)) return; /* Minimise down-time: drop delay for up event. */ if (urgent) { if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) return; delay = 0; } /* If we wrap around we'll delay it by at most HZ. */ if (delay > HZ) delay = 0; /* * If urgent, schedule immediate execution; otherwise, don't * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } static void linkwatch_do_dev(struct net_device *dev) { /* * Make sure the above read is complete since it can be * rewritten as soon as we clear the bit below. */ smp_mb__before_atomic(); /* We are about to handle this device, * so new events can be accepted */ clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); else dev_deactivate(dev); netdev_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). */ __dev_put(dev); } static void __linkwatch_run_queue(int urgent_only) { #define MAX_DO_DEV_PER_LOOP 100 int do_dev = MAX_DO_DEV_PER_LOOP; /* Use a local list here since we add non-urgent * events back to the global one when called with * urgent_only=1. */ LIST_HEAD(wrk); /* Give urgent case more budget */ if (urgent_only) do_dev += MAX_DO_DEV_PER_LOOP; /* * Limit the number of linkwatch events to one * per second so that a runaway driver does not * cause a storm of messages on the netlink * socket. This limit does not apply to up events * while the device qdisc is down. */ if (!urgent_only) linkwatch_nextevent = jiffies + HZ; /* Limit wrap-around effect on delay. */ else if (time_after(linkwatch_nextevent, jiffies + HZ)) linkwatch_nextevent = jiffies; clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist, &wrk); while (!list_empty(&wrk) && do_dev > 0) { struct net_device *dev; dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); if (!netif_device_present(dev) || (urgent_only && !linkwatch_urgent_event(dev))) { list_add_tail(&dev->link_watch_list, &lweventlist); continue; } /* We must free netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); linkwatch_do_dev(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } /* Add the remaining work back to lweventlist */ list_splice_init(&wrk, &lweventlist); if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); } void linkwatch_sync_dev(struct net_device *dev) { unsigned long flags; int clean = 0; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); clean = 1; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); if (clean) linkwatch_do_dev(dev); } /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { __linkwatch_run_queue(0); } static void linkwatch_event(struct work_struct *dummy) { rtnl_lock(); __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); rtnl_unlock(); } void linkwatch_fire_event(struct net_device *dev) { bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { linkwatch_add_event(dev); } else if (!urgent) return; linkwatch_schedule_work(urgent); } EXPORT_SYMBOL(linkwatch_fire_event);
137 142 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ #include <linux/ctype.h> #include <linux/list.h> #include <linux/sched.h> #include <drm/drm_fourcc.h> #include <drm/drm_mode_object.h> struct drm_clip_rect; struct drm_device; struct drm_file; struct drm_framebuffer; struct drm_gem_object; /** * struct drm_framebuffer_funcs - framebuffer hooks */ struct drm_framebuffer_funcs { /** * @destroy: * * Clean up framebuffer resources, specifically also unreference the * backing storage. The core guarantees to call this function for every * framebuffer successfully created by calling * &drm_mode_config_funcs.fb_create. Drivers must also call * drm_framebuffer_cleanup() to release DRM core resources for this * framebuffer. */ void (*destroy)(struct drm_framebuffer *framebuffer); /** * @create_handle: * * Create a buffer handle in the driver-specific buffer manager (either * GEM or TTM) valid for the passed-in &struct drm_file. This is used by * the core to implement the GETFB IOCTL, which returns (for * sufficiently priviledged user) also a native buffer handle. This can * be used for seamless transitions between modesetting clients by * copying the current screen contents to a private buffer and blending * between that and the new contents. * * GEM based drivers should call drm_gem_handle_create() to create the * handle. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*create_handle)(struct drm_framebuffer *fb, struct drm_file *file_priv, unsigned int *handle); /** * @dirty: * * Optional callback for the dirty fb IOCTL. * * Userspace can notify the driver via this callback that an area of the * framebuffer has changed and should be flushed to the display * hardware. This can also be used internally, e.g. by the fbdev * emulation, though that's not the case currently. * * See documentation in drm_mode.h for the struct drm_mode_fb_dirty_cmd * for more information as all the semantics and arguments have a one to * one mapping on this function. * * Atomic drivers should use drm_atomic_helper_dirtyfb() to implement * this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*dirty)(struct drm_framebuffer *framebuffer, struct drm_file *file_priv, unsigned flags, unsigned color, struct drm_clip_rect *clips, unsigned num_clips); }; /** * struct drm_framebuffer - frame buffer object * * Note that the fb is refcounted for the benefit of driver internals, * for example some hw, disabling a CRTC/plane is asynchronous, and * scanout does not actually complete until the next vblank. So some * cleanup (like releasing the reference(s) on the backing GEM bo(s)) * should be deferred. In cases like this, the driver would like to * hold a ref to the fb even though it has already been removed from * userspace perspective. See drm_framebuffer_get() and * drm_framebuffer_put(). * * The refcount is stored inside the mode object @base. */ struct drm_framebuffer { /** * @dev: DRM device this framebuffer belongs to */ struct drm_device *dev; /** * @head: Place on the &drm_mode_config.fb_list, access protected by * &drm_mode_config.fb_lock. */ struct list_head head; /** * @base: base modeset object structure, contains the reference count. */ struct drm_mode_object base; /** * @comm: Name of the process allocating the fb, used for fb dumping. */ char comm[TASK_COMM_LEN]; /** * @format: framebuffer format information */ const struct drm_format_info *format; /** * @funcs: framebuffer vfunc table */ const struct drm_framebuffer_funcs *funcs; /** * @pitches: Line stride per buffer. For userspace created object this * is copied from drm_mode_fb_cmd2. */ unsigned int pitches[DRM_FORMAT_MAX_PLANES]; /** * @offsets: Offset from buffer start to the actual pixel data in bytes, * per buffer. For userspace created object this is copied from * drm_mode_fb_cmd2. * * Note that this is a linear offset and does not take into account * tiling or buffer layout per @modifier. It is meant to be used when * the actual pixel data for this framebuffer plane starts at an offset, * e.g. when multiple planes are allocated within the same backing * storage buffer object. For tiled layouts this generally means its * @offsets must at least be tile-size aligned, but hardware often has * stricter requirements. * * This should not be used to specifiy x/y pixel offsets into the buffer * data (even for linear buffers). Specifying an x/y pixel offset is * instead done through the source rectangle in &struct drm_plane_state. */ unsigned int offsets[DRM_FORMAT_MAX_PLANES]; /** * @modifier: Data layout modifier. This is used to describe * tiling, or also special layouts (like compression) of auxiliary * buffers. For userspace created object this is copied from * drm_mode_fb_cmd2. */ uint64_t modifier; /** * @width: Logical width of the visible area of the framebuffer, in * pixels. */ unsigned int width; /** * @height: Logical height of the visible area of the framebuffer, in * pixels. */ unsigned int height; /** * @flags: Framebuffer flags like DRM_MODE_FB_INTERLACED or * DRM_MODE_FB_MODIFIERS. */ int flags; /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ struct list_head filp_head; /** * @obj: GEM objects backing the framebuffer, one per plane (optional). * * This is used by the GEM framebuffer helpers, see e.g. * drm_gem_fb_create(). */ struct drm_gem_object *obj[DRM_FORMAT_MAX_PLANES]; }; #define obj_to_fb(x) container_of(x, struct drm_framebuffer, base) int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer *drm_framebuffer_lookup(struct drm_device *dev, struct drm_file *file_priv, uint32_t id); void drm_framebuffer_remove(struct drm_framebuffer *fb); void drm_framebuffer_cleanup(struct drm_framebuffer *fb); void drm_framebuffer_unregister_private(struct drm_framebuffer *fb); /** * drm_framebuffer_get - acquire a framebuffer reference * @fb: DRM framebuffer * * This function increments the framebuffer's reference count. */ static inline void drm_framebuffer_get(struct drm_framebuffer *fb) { drm_mode_object_get(&fb->base); } /** * drm_framebuffer_put - release a framebuffer reference * @fb: DRM framebuffer * * This function decrements the framebuffer's reference count and frees the * framebuffer if the reference count drops to zero. */ static inline void drm_framebuffer_put(struct drm_framebuffer *fb) { drm_mode_object_put(&fb->base); } /** * drm_framebuffer_read_refcount - read the framebuffer reference count. * @fb: framebuffer * * This functions returns the framebuffer's reference count. */ static inline uint32_t drm_framebuffer_read_refcount(const struct drm_framebuffer *fb) { return kref_read(&fb->base.refcount); } /** * drm_framebuffer_assign - store a reference to the fb * @p: location to store framebuffer * @fb: new framebuffer (maybe NULL) * * This functions sets the location to store a reference to the framebuffer, * unreferencing the framebuffer that was previously stored in that location. */ static inline void drm_framebuffer_assign(struct drm_framebuffer **p, struct drm_framebuffer *fb) { if (fb) drm_framebuffer_get(fb); if (*p) drm_framebuffer_put(*p); *p = fb; } /* * drm_for_each_fb - iterate over all framebuffers * @fb: the loop cursor * @dev: the DRM device * * Iterate over all framebuffers of @dev. User must hold * &drm_mode_config.fb_lock. */ #define drm_for_each_fb(fb, dev) \ for (WARN_ON(!mutex_is_locked(&(dev)->mode_config.fb_lock)), \ fb = list_first_entry(&(dev)->mode_config.fb_list, \ struct drm_framebuffer, head); \ &fb->head != (&(dev)->mode_config.fb_list); \ fb = list_next_entry(fb, head)) /** * struct drm_afbc_framebuffer - a special afbc frame buffer object * * A derived class of struct drm_framebuffer, dedicated for afbc use cases. */ struct drm_afbc_framebuffer { /** * @base: base framebuffer structure. */ struct drm_framebuffer base; /** * @block_width: width of a single afbc block */ u32 block_width; /** * @block_height: height of a single afbc block */ u32 block_height; /** * @aligned_width: aligned frame buffer width */ u32 aligned_width; /** * @aligned_height: aligned frame buffer height */ u32 aligned_height; /** * @offset: offset of the first afbc header */ u32 offset; /** * @afbc_size: minimum size of afbc buffer */ u32 afbc_size; }; #define fb_to_afbc_fb(x) container_of(x, struct drm_afbc_framebuffer, base) #endif
18 18 5 5 5 5 1 4 23 6 15 3 15 15 4 42 42 7 22 11 2 1 31 2 1 1 1 1 10 10 8 2 8 8 2 6 6 2 4 4 7 81 82 2 4 2 2 2 74 68 72 76 77 77 2 5 5 5 5 7 7 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 17 1 1 1 18 18 18 18 2 1 2 1 2 17 18 18 18 18 18 18 12 18 12 5 18 19 19 19 18 18 18 18 17 1 18 18 18 18 18 17 1 18 18 1 18 18 18 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 3 1 3 1 1 2 1 1 2 4 4 4 4 4 1 3 4 1 1 1 1 1 1 1 4 4 4 1 3 4 4 4 4 4 4 4 3 1 3 1 3 1 4 4 3 1 4 1 1 1 1 1 1 1 5 1 4 31 31 18 14 11 3 3 2 2 2 2 2 2 2 2 2 1 1 51 51 2 2 4 66 1 2 2 17 13 4 16 1 1 14 2 11 2 3 10 1 3 9 4 10 3 5 5 5 2 2 3 3 3 3 90 90 89 1 1 5 5 5 5 8 2 7 5 5 5 5 5 19 1 2 18 17 17 17 17 17 2 2 2 12 2 10 2 8 12 12 12 12 12 12 2 9 1 8 2 12 12 12 3 49 49 46 46 53 2 51 2 51 15 44 2 44 2 89 89 89 89 8 37 50 89 73 71 73 73 1 1 114 114 115 115 115 114 115 115 115 115 124 122 8 80 36 122 4 4 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 configuration hooks for cfg80211 * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/nl80211.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <linux/rcupdate.h> #include <linux/fips.h> #include <linux/if_ether.h> #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "mesh.h" #include "wme.h" static struct ieee80211_link_data * ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id, bool require_valid) { struct ieee80211_link_data *link; if (link_id < 0) { /* * For keys, if sdata is not an MLD, we might not use * the return value at all (if it's not a pairwise key), * so in that case (require_valid==false) don't error. */ if (require_valid && ieee80211_vif_is_mld(&sdata->vif)) return ERR_PTR(-EINVAL); return &sdata->deflink; } link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return ERR_PTR(-ENOLINK); return link; } static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { bool mu_mimo_groups = false; bool mu_mimo_follow = false; if (params->vht_mumimo_groups) { u64 membership; BUILD_BUG_ON(sizeof(membership) != WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); memcpy(sdata->vif.bss_conf.mu_group.position, params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, WLAN_USER_POSITION_LEN); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MU_GROUPS); /* don't care about endianness - just check for 0 */ memcpy(&membership, params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); mu_mimo_groups = membership != 0; } if (params->vht_mumimo_follow_addr) { mu_mimo_follow = is_valid_ether_addr(params->vht_mumimo_follow_addr); ether_addr_copy(sdata->u.mntr.mu_follow_addr, params->vht_mumimo_follow_addr); } sdata->vif.bss_conf.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow; } static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, struct vif_params *params) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *monitor_sdata; /* check flags first */ if (params->flags && ieee80211_sdata_running(sdata)) { u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE; /* * Prohibit MONITOR_FLAG_COOK_FRAMES and * MONITOR_FLAG_ACTIVE to be changed while the * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ if ((params->flags & mask) != (sdata->u.mntr.flags & mask)) return -EBUSY; } /* also validate MU-MIMO change */ if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) monitor_sdata = sdata; else monitor_sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!monitor_sdata && (params->vht_mumimo_groups || params->vht_mumimo_follow_addr)) return -EOPNOTSUPP; /* apply all changes now - no failures allowed */ if (monitor_sdata && (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) || ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { if (ieee80211_sdata_running(sdata)) { ieee80211_adjust_monitor_flags(sdata, -1); sdata->u.mntr.flags = params->flags; ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); } else { /* * Because the interface is down, ieee80211_do_stop * and ieee80211_do_open take care of "everything" * mentioned in the comment above. */ sdata->u.mntr.flags = params->flags; } } return 0; } static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, struct cfg80211_mbssid_config *params, struct ieee80211_bss_conf *link_conf) { struct ieee80211_sub_if_data *tx_sdata; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; if (sdata->vif.type != NL80211_IFTYPE_AP || !params->tx_wdev) return -EINVAL; tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params->tx_wdev); if (!tx_sdata) return -EINVAL; if (tx_sdata == sdata) { sdata->vif.mbssid_tx_vif = &sdata->vif; } else { sdata->vif.mbssid_tx_vif = &tx_sdata->vif; link_conf->nontransmitted = true; link_conf->bssid_index = params->index; } if (params->ema) link_conf->ema_ap = true; return 0; } static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct wireless_dev *wdev; struct ieee80211_sub_if_data *sdata; int err; err = ieee80211_if_add(local, name, name_assign_type, &wdev, type, params); if (err) return ERR_PTR(err); sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (type == NL80211_IFTYPE_MONITOR) { err = ieee80211_set_mon_options(sdata, params); if (err) { ieee80211_if_remove(sdata); return NULL; } } /* Let the driver know that an interface is going to be added. * Indicate so only for interface types that will be added to the * driver. */ switch (type) { case NL80211_IFTYPE_AP_VLAN: break; case NL80211_IFTYPE_MONITOR: if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) || !(params->flags & MONITOR_FLAG_ACTIVE)) break; fallthrough; default: drv_prep_add_interface(local, ieee80211_vif_type_p2p(&sdata->vif)); break; } return wdev; } static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_if_remove(IEEE80211_WDEV_TO_SUB_IF(wdev)); return 0; } static int ieee80211_change_iface(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret; lockdep_assert_wiphy(local->hw.wiphy); ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; if (type == NL80211_IFTYPE_AP_VLAN && params->use_4addr == 0) { RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (params->use_4addr == ifmgd->use_4addr) return 0; /* FIXME: no support for 4-addr MLO yet */ if (ieee80211_vif_is_mld(&sdata->vif)) return -EOPNOTSUPP; sdata->u.mgd.use_4addr = params->use_4addr; if (!ifmgd->associated) return 0; sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); if (sta) drv_sta_set_4addr(local, sdata, &sta->sta, params->use_4addr); if (params->use_4addr) ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { ret = ieee80211_set_mon_options(sdata, params); if (ret) return ret; } return 0; } static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; return ieee80211_do_open(wdev, true); } static void ieee80211_stop_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } static int ieee80211_start_nan(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; ret = ieee80211_do_open(wdev, true); if (ret) return ret; ret = drv_start_nan(sdata->local, sdata, conf); if (ret) ieee80211_sdata_stop(sdata); sdata->u.nan.conf = *conf; return ret; } static void ieee80211_stop_nan(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); drv_stop_nan(sdata->local, sdata); ieee80211_sdata_stop(sdata); } static int ieee80211_nan_change_conf(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_conf new_conf; int ret = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; new_conf = sdata->u.nan.conf; if (changes & CFG80211_NAN_CONF_CHANGED_PREF) new_conf.master_pref = conf->master_pref; if (changes & CFG80211_NAN_CONF_CHANGED_BANDS) new_conf.bands = conf->bands; ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); if (!ret) sdata->u.nan.conf = new_conf; return ret; } static int ieee80211_add_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_func *nan_func) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; if (sdata->vif.type != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; spin_lock_bh(&sdata->u.nan.func_lock); ret = idr_alloc(&sdata->u.nan.function_inst_ids, nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, GFP_ATOMIC); spin_unlock_bh(&sdata->u.nan.func_lock); if (ret < 0) return ret; nan_func->instance_id = ret; WARN_ON(nan_func->instance_id == 0); ret = drv_add_nan_func(sdata->local, sdata, nan_func); if (ret) { spin_lock_bh(&sdata->u.nan.func_lock); idr_remove(&sdata->u.nan.function_inst_ids, nan_func->instance_id); spin_unlock_bh(&sdata->u.nan.func_lock); } return ret; } static struct cfg80211_nan_func * ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, u64 cookie) { struct cfg80211_nan_func *func; int id; lockdep_assert_held(&sdata->u.nan.func_lock); idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { if (func->cookie == cookie) return func; } return NULL; } static void ieee80211_del_nan_func(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct cfg80211_nan_func *func; u8 instance_id = 0; if (sdata->vif.type != NL80211_IFTYPE_NAN || !ieee80211_sdata_running(sdata)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = ieee80211_find_nan_func_by_cookie(sdata, cookie); if (func) instance_id = func->instance_id; spin_unlock_bh(&sdata->u.nan.func_lock); if (instance_id) drv_del_nan_func(sdata->local, sdata, instance_id); } static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->noack_map = noack_map; ieee80211_check_fast_xmit_iface(sdata); return 0; } static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, const u8 *mac_addr, u8 key_idx) { struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; struct sta_info *sta; int ret = -EINVAL; if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) return -EINVAL; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return -EINVAL; if (sta->ptk_idx == key_idx) return 0; key = wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) ret = ieee80211_set_tx_key(key); return ret; } static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; struct ieee80211_key *key; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; if (IS_ERR(link)) return PTR_ERR(link); if (WARN_ON(pairwise && link_id >= 0)) return -EINVAL; if (pairwise && params->mode == NL80211_KEY_SET_TX) return ieee80211_set_tx(sdata, mac_addr, key_idx); /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_WEP104: if (link_id >= 0) return -EINVAL; if (WARN_ON_ONCE(fips_enabled)) return -EINVAL; break; default: break; } key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); if (pairwise) { key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; key->conf.link_id = -1; } else { key->conf.link_id = link->link_id; } if (params->mode == NL80211_KEY_NO_TX) key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX; if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); /* * The ASSOC test makes sure the driver is ready to * receive the key. When wpa_supplicant has roamed * using FT, it attempts to set the key before * association has completed, this rejects that attempt * so it will set the key again after association. * * TODO: accept the key if we have a station entry and * add it to the device after the station. */ if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free_unused(key); return -ENOENT; } } switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: /* no MFP (yet) */ break; case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; #endif case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_OCB: /* shouldn't happen */ WARN_ON_ONCE(1); break; } err = ieee80211_key_link(key, link, sta); /* KRACK protection, shouldn't happen but just silently accept key */ if (err == -EALREADY) err = 0; return err; } static struct ieee80211_key * ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_local *local __maybe_unused = sdata->local; struct ieee80211_link_data *link = &sdata->deflink; struct ieee80211_key *key; if (link_id >= 0) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return NULL; } if (mac_addr) { struct sta_info *sta; struct link_sta_info *link_sta; sta = sta_info_get_bss(sdata, mac_addr); if (!sta) return NULL; if (link_id >= 0) { link_sta = rcu_dereference_check(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) return NULL; } else { link_sta = &sta->deflink; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) return wiphy_dereference(local->hw.wiphy, link_sta->gtk[key_idx]); return NULL; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); key = wiphy_dereference(local->hw.wiphy, link->gtk[key_idx]); if (key) return key; /* or maybe it was a WEP key */ if (key_idx < NUM_DEFAULT_KEYS) return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); return NULL; } static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; lockdep_assert_wiphy(local->hw.wiphy); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) return -ENOENT; ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION); return 0; } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *params)) { struct ieee80211_sub_if_data *sdata; u8 seq[6] = {0}; struct key_params params; struct ieee80211_key *key; u64 pn64; u32 iv32; u16 iv16; int err = -ENOENT; struct ieee80211_key_seq kseq = {}; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); if (!key) goto out; memset(&params, 0, sizeof(params)); params.cipher = key->conf.cipher; switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: pn64 = atomic64_read(&key->conf.tx_pn); iv32 = TKIP_PN_TO_IV32(pn64); iv16 = TKIP_PN_TO_IV16(pn64); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); iv32 = kseq.tkip.iv32; iv16 = kseq.tkip.iv16; } seq[0] = iv16 & 0xff; seq[1] = (iv16 >> 8) & 0xff; seq[2] = iv32 & 0xff; seq[3] = (iv32 >> 8) & 0xff; seq[4] = (iv32 >> 16) & 0xff; seq[5] = (iv32 >> 24) & 0xff; params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_cmac)); fallthrough; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_gmac)); fallthrough; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), gcmp)); if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { drv_get_key_seq(sdata->local, key, &kseq); memcpy(seq, kseq.ccmp.pn, 6); } else { pn64 = atomic64_read(&key->conf.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; seq[2] = pn64 >> 16; seq[3] = pn64 >> 24; seq[4] = pn64 >> 32; seq[5] = pn64 >> 40; } params.seq = seq; params.seq_len = 6; break; default: if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) break; if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) break; drv_get_key_seq(sdata->local, key, &kseq); params.seq = kseq.hw.seq; params.seq_len = kseq.hw.seq_len; break; } callback(cookie, &params); err = 0; out: rcu_read_unlock(); return err; } static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx, bool uni, bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_key(link, key_idx, uni, multi); return 0; } static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_mgmt_key(link, key_idx); return 0; } static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, struct net_device *dev, int link_id, u8 key_idx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); if (IS_ERR(link)) return PTR_ERR(link); ieee80211_set_default_beacon_key(link, key_idx); return 0; } void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo) { rinfo->flags = 0; if (rate->flags & IEEE80211_TX_RC_MCS) { rinfo->flags |= RATE_INFO_FLAGS_MCS; rinfo->mcs = rate->idx; } else if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; rinfo->mcs = ieee80211_rate_get_vht_mcs(rate); rinfo->nss = ieee80211_rate_get_vht_nss(rate); } else { struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sta->sdata); WARN_ON_ONCE(sband && !sband->bitrates); if (sband && sband->bitrates) rinfo->legacy = sband->bitrates[rate->idx].bitrate; } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_40; else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_80; else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_160; else rinfo->bw = RATE_INFO_BW_20; if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_by_idx(sdata, idx); if (sta) { ret = 0; memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, int idx, struct survey_info *survey) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); return drv_get_survey(local, idx, survey); } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (sta) { ret = 0; sta_set_sinfo(sta, sinfo, true); } return ret; } static int ieee80211_set_monitor_channel(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; lockdep_assert_wiphy(local->hw.wiphy); sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, &chanreq.oper)) return 0; sdata = wiphy_dereference(wiphy, local->monitor_sdata); if (!sdata) goto done; } if (rcu_access_pointer(sdata->deflink.conf->chanctx_conf) && cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, &chanreq.oper)) return 0; ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (ret) return ret; done: local->monitor_chanreq = chanreq; return 0; } static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, struct ieee80211_link_data *link) { struct probe_resp *new, *old; if (!resp || !resp_len) return 1; old = sdata_dereference(link->u.ap.probe_resp, sdata); new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = resp_len; memcpy(new->data, resp, resp_len); if (csa) memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp, csa->n_counter_offsets_presp * sizeof(new->cntdwn_counter_offsets[0])); else if (cca) new->cntdwn_counter_offsets[0] = cca->counter_offset_presp; rcu_assign_pointer(link->u.ap.probe_resp, new); if (old) kfree_rcu(old, rcu_head); return 0; } static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, struct cfg80211_fils_discovery *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct fils_discovery_data *new, *old = NULL; struct ieee80211_fils_discovery *fd; if (!params->update) return 0; fd = &link_conf->fils_discovery; fd->min_interval = params->min_interval; fd->max_interval = params->max_interval; old = sdata_dereference(link->u.ap.fils_discovery, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.fils_discovery, new); } else { RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); } *changed |= BSS_CHANGED_FILS_DISCOVERY; return 0; } static int ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, struct cfg80211_unsol_bcast_probe_resp *params, struct ieee80211_link_data *link, struct ieee80211_bss_conf *link_conf, u64 *changed) { struct unsol_bcast_probe_resp_data *new, *old = NULL; if (!params->update) return 0; link_conf->unsol_bcast_probe_resp_interval = params->interval; old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); if (old) kfree_rcu(old, rcu_head); if (params->tmpl && params->tmpl_len) { new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); if (!new) return -ENOMEM; new->len = params->tmpl_len; memcpy(new->data, params->tmpl, params->tmpl_len); rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); } else { RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); } *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; return 0; } static int ieee80211_set_ftm_responder_params( struct ieee80211_sub_if_data *sdata, const u8 *lci, size_t lci_len, const u8 *civicloc, size_t civicloc_len, struct ieee80211_bss_conf *link_conf) { struct ieee80211_ftm_responder_params *new, *old; u8 *pos; int len; if (!lci_len && !civicloc_len) return 0; old = link_conf->ftmr_params; len = lci_len + civicloc_len; new = kzalloc(sizeof(*new) + len, GFP_KERNEL); if (!new) return -ENOMEM; pos = (u8 *)(new + 1); if (lci_len) { new->lci_len = lci_len; new->lci = pos; memcpy(pos, lci, lci_len); pos += lci_len; } if (civicloc_len) { new->civicloc_len = civicloc_len; new->civicloc = pos; memcpy(pos, civicloc, civicloc_len); pos += civicloc_len; } link_conf->ftmr_params = new; kfree(old); return 0; } static int ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst, struct cfg80211_mbssid_elems *src) { int i, offset = 0; dst->cnt = src->cnt; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } return offset; } static int ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst, struct cfg80211_rnr_elems *src) { int i, offset = 0; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } dst->cnt = src->cnt; return offset; } static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_beacon_data *params, const struct ieee80211_csa_settings *csa, const struct ieee80211_color_change_settings *cca, u64 *changed) { struct cfg80211_mbssid_elems *mbssid = NULL; struct cfg80211_rnr_elems *rnr = NULL; struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; u64 _changed = BSS_CHANGED_BEACON; struct ieee80211_bss_conf *link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); /* Need to have a beacon head if we don't have one yet */ if (!params->head && !old) return -EINVAL; /* new or old head? */ if (params->head) new_head_len = params->head_len; else new_head_len = old->head_len; /* new or old tail? */ if (params->tail || !old) /* params->tail_len will be zero for !params->tail */ new_tail_len = params->tail_len; else new_tail_len = old->tail_len; size = sizeof(*new) + new_head_len + new_tail_len; /* new or old multiple BSSID elements? */ if (params->mbssid_ies) { mbssid = params->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (params->rnr_ies) { rnr = params->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } else if (old && old->mbssid_ies) { mbssid = old->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); if (old && old->rnr_ies) { rnr = old->rnr_ies; size += struct_size(new->rnr_ies, elem, rnr->cnt); } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); } new = kzalloc(size, GFP_KERNEL); if (!new) return -ENOMEM; /* start filling the new info now */ /* * pointers go into the block we allocated, * memory is | beacon_data | head | tail | mbssid_ies | rnr_ies */ new->head = ((u8 *) new) + sizeof(*new); new->tail = new->head + new_head_len; new->head_len = new_head_len; new->tail_len = new_tail_len; /* copy in optional mbssid_ies */ if (mbssid) { u8 *pos = new->tail + new->tail_len; new->mbssid_ies = (void *)pos; pos += struct_size(new->mbssid_ies, elem, mbssid->cnt); pos += ieee80211_copy_mbssid_beacon(pos, new->mbssid_ies, mbssid); if (rnr) { new->rnr_ies = (void *)pos; pos += struct_size(new->rnr_ies, elem, rnr->cnt); ieee80211_copy_rnr_beacon(pos, new->rnr_ies, rnr); } /* update bssid_indicator */ link_conf->bssid_indicator = ilog2(__roundup_pow_of_two(mbssid->cnt + 1)); } if (csa) { new->cntdwn_current_counter = csa->count; memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon, csa->n_counter_offsets_beacon * sizeof(new->cntdwn_counter_offsets[0])); } else if (cca) { new->cntdwn_current_counter = cca->count; new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon; } /* copy in head */ if (params->head) memcpy(new->head, params->head, new_head_len); else memcpy(new->head, old->head, new_head_len); /* copy in optional tail */ if (params->tail) memcpy(new->tail, params->tail, new_tail_len); else if (old) memcpy(new->tail, old->tail, new_tail_len); err = ieee80211_set_probe_resp(sdata, params->probe_resp, params->probe_resp_len, csa, cca, link); if (err < 0) { kfree(new); return err; } if (err == 0) _changed |= BSS_CHANGED_AP_PROBE_RESP; if (params->ftm_responder != -1) { link_conf->ftm_responder = params->ftm_responder; err = ieee80211_set_ftm_responder_params(sdata, params->lci, params->lci_len, params->civicloc, params->civicloc_len, link_conf); if (err < 0) { kfree(new); return err; } _changed |= BSS_CHANGED_FTM_RESPONDER; } rcu_assign_pointer(link->u.ap.beacon, new); sdata->u.ap.active = true; if (old) kfree_rcu(old, rcu_head); *changed |= _changed; return 0; } static u8 ieee80211_num_beaconing_links(struct ieee80211_sub_if_data *sdata) { struct ieee80211_link_data *link; u8 link_id, num = 0; if (sdata->vif.type != NL80211_IFTYPE_AP && sdata->vif.type != NL80211_IFTYPE_P2P_GO) return num; if (!sdata->vif.valid_links) return num; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; if (sdata_dereference(link->u.ap.beacon, sdata)) num++; } return num; } static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct beacon_data *old; struct ieee80211_sub_if_data *vlan; u64 changed = BSS_CHANGED_BEACON_INT | BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | BSS_CHANGED_TWT; int i, err; int prev_beacon_int; unsigned int link_id = params->beacon.link_id; struct ieee80211_link_data *link; struct ieee80211_bss_conf *link_conf; struct ieee80211_chan_req chanreq = { .oper = params->chandef }; lockdep_assert_wiphy(local->hw.wiphy); link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; old = sdata_dereference(link->u.ap.beacon, sdata); if (old) return -EALREADY; link->smps_mode = IEEE80211_SMPS_OFF; link->needed_rx_chains = sdata->local->rx_chains; prev_beacon_int = link_conf->beacon_int; link_conf->beacon_int = params->beacon_interval; if (params->ht_cap) link_conf->ht_ldpc = params->ht_cap->cap_info & cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING); if (params->vht_cap) { link_conf->vht_ldpc = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC); link_conf->vht_su_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); link_conf->vht_su_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); link_conf->vht_mu_beamformer = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE); link_conf->vht_mu_beamformee = params->vht_cap->vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); } if (params->he_cap && params->he_oper) { link_conf->he_support = true; link_conf->htc_trig_based_pkt_ext = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK); link_conf->frame_time_rts_th = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); changed |= BSS_CHANGED_HE_OBSS_PD; if (params->beacon.he_bss_color.enabled) changed |= BSS_CHANGED_HE_BSS_COLOR; } if (params->he_cap) { link_conf->he_ldpc = params->he_cap->phy_cap_info[1] & IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD; link_conf->he_su_beamformer = params->he_cap->phy_cap_info[3] & IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER; link_conf->he_su_beamformee = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE; link_conf->he_mu_beamformer = params->he_cap->phy_cap_info[4] & IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER; link_conf->he_full_ul_mumimo = params->he_cap->phy_cap_info[2] & IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO; } if (params->eht_cap) { if (!link_conf->he_support) return -EOPNOTSUPP; link_conf->eht_support = true; link_conf->eht_su_beamformer = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER; link_conf->eht_su_beamformee = params->eht_cap->fixed.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE; link_conf->eht_mu_beamformer = params->eht_cap->fixed.phy_cap_info[7] & (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); link_conf->eht_80mhz_full_bw_ul_mumimo = params->eht_cap->fixed.phy_cap_info[7] & (IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ); } else { link_conf->eht_su_beamformer = false; link_conf->eht_su_beamformee = false; link_conf->eht_mu_beamformer = false; } if (sdata->vif.type == NL80211_IFTYPE_AP && params->mbssid_config.tx_wdev) { err = ieee80211_set_ap_mbssid_options(sdata, &params->mbssid_config, link_conf); if (err) return err; } err = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); if (!err) ieee80211_link_copy_chanctx_to_vlans(link, false); if (err) { link_conf->beacon_int = prev_beacon_int; return err; } /* * Apply control port protocol, this allows us to * not encrypt dynamic WEP control frames. */ sdata->control_port_protocol = params->crypto.control_port_ethertype; sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; sdata->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; sdata->control_port_no_preauth = params->crypto.control_port_no_preauth; list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->control_port_protocol = params->crypto.control_port_ethertype; vlan->control_port_no_encrypt = params->crypto.control_port_no_encrypt; vlan->control_port_over_nl80211 = params->crypto.control_port_over_nl80211; vlan->control_port_no_preauth = params->crypto.control_port_no_preauth; } link_conf->dtim_period = params->dtim_period; link_conf->enable_beacon = true; link_conf->allow_p2p_go_ps = sdata->vif.p2p; link_conf->twt_responder = params->twt_responder; link_conf->he_obss_pd = params->he_obss_pd; link_conf->he_bss_color = params->beacon.he_bss_color; sdata->vif.cfg.s1g = params->chandef.chan->band == NL80211_BAND_S1GHZ; sdata->vif.cfg.ssid_len = params->ssid_len; if (params->ssid_len) memcpy(sdata->vif.cfg.ssid, params->ssid, params->ssid_len); link_conf->hidden_ssid = (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); memset(&link_conf->p2p_noa_attr, 0, sizeof(link_conf->p2p_noa_attr)); link_conf->p2p_noa_attr.oppps_ctwindow = params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; if (params->p2p_opp_ps) link_conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = params->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) link_conf->beacon_tx_rate = params->beacon_rate; err = ieee80211_assign_beacon(sdata, link, &params->beacon, NULL, NULL, &changed); if (err < 0) goto error; err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery, link, link_conf, &changed); if (err < 0) goto error; err = ieee80211_set_unsol_bcast_probe_resp(sdata, &params->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) goto error; err = drv_start_ap(sdata->local, sdata, link_conf); if (err) { old = sdata_dereference(link->u.ap.beacon, sdata); if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(link->u.ap.beacon, NULL); if (ieee80211_num_beaconing_links(sdata) == 0) sdata->u.ap.active = false; goto error; } ieee80211_recalc_dtim(local, sdata); ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID); ieee80211_link_info_change_notify(sdata, link, changed); if (ieee80211_num_beaconing_links(sdata) <= 1) netif_carrier_on(dev); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_on(vlan->dev); return 0; error: ieee80211_link_release_channel(link); return err; } static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_update *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct cfg80211_beacon_data *beacon = &params->beacon; struct beacon_data *old; int err; struct ieee80211_bss_conf *link_conf; u64 changed = 0; lockdep_assert_wiphy(wiphy); link = sdata_dereference(sdata->link[beacon->link_id], sdata); if (!link) return -ENOLINK; link_conf = link->conf; /* don't allow changing the beacon while a countdown is in place - offset * of channel switch counter may change */ if (link_conf->csa_active || link_conf->color_change_active) return -EBUSY; old = sdata_dereference(link->u.ap.beacon, sdata); if (!old) return -ENOENT; err = ieee80211_assign_beacon(sdata, link, beacon, NULL, NULL, &changed); if (err < 0) return err; err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery, link, link_conf, &changed); if (err < 0) return err; err = ieee80211_set_unsol_bcast_probe_resp(sdata, &params->unsol_bcast_probe_resp, link, link_conf, &changed); if (err < 0) return err; if (beacon->he_bss_color_valid && beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled; changed |= BSS_CHANGED_HE_BSS_COLOR; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static void ieee80211_free_next_beacon(struct ieee80211_link_data *link) { if (!link->u.ap.next_beacon) return; kfree(link->u.ap.next_beacon->mbssid_ies); kfree(link->u.ap.next_beacon->rnr_ies); kfree(link->u.ap.next_beacon); link->u.ap.next_beacon = NULL; } static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_sub_if_data *vlan; struct ieee80211_local *local = sdata->local; struct beacon_data *old_beacon; struct probe_resp *old_probe_resp; struct fils_discovery_data *old_fils_discovery; struct unsol_bcast_probe_resp_data *old_unsol_bcast_probe_resp; struct cfg80211_chan_def chandef; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct ieee80211_bss_conf *link_conf = link->conf; LIST_HEAD(keys); lockdep_assert_wiphy(local->hw.wiphy); old_beacon = sdata_dereference(link->u.ap.beacon, sdata); if (!old_beacon) return -ENOENT; old_probe_resp = sdata_dereference(link->u.ap.probe_resp, sdata); old_fils_discovery = sdata_dereference(link->u.ap.fils_discovery, sdata); old_unsol_bcast_probe_resp = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); /* abort any running channel switch or color change */ link_conf->csa_active = false; link_conf->color_change_active = false; ieee80211_vif_unblock_queues_csa(sdata); ieee80211_free_next_beacon(link); /* turn off carrier for this interface and dependent VLANs */ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_off(vlan->dev); if (ieee80211_num_beaconing_links(sdata) <= 1) { netif_carrier_off(dev); sdata->u.ap.active = false; } /* remove beacon and probe response */ RCU_INIT_POINTER(link->u.ap.beacon, NULL); RCU_INIT_POINTER(link->u.ap.probe_resp, NULL); RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); kfree_rcu(old_beacon, rcu_head); if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); if (old_fils_discovery) kfree_rcu(old_fils_discovery, rcu_head); if (old_unsol_bcast_probe_resp) kfree_rcu(old_unsol_bcast_probe_resp, rcu_head); kfree(link_conf->ftmr_params); link_conf->ftmr_params = NULL; sdata->vif.mbssid_tx_vif = NULL; link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; __sta_info_flush(sdata, true, link_id, NULL); ieee80211_remove_link_keys(link, &keys); if (!list_empty(&keys)) { synchronize_net(); ieee80211_free_key_list(local, &keys); } link_conf->enable_beacon = false; sdata->beacon_rate_set = false; sdata->vif.cfg.ssid_len = 0; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_BEACON_ENABLED); if (sdata->wdev.links[link_id].cac_started) { chandef = link_conf->chanreq.oper; wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL, link_id); } drv_stop_ap(sdata->local, sdata, link_conf); /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); ieee80211_link_copy_chanctx_to_vlans(link, true); ieee80211_link_release_channel(link); return 0; } static int sta_apply_auth_flags(struct ieee80211_local *local, struct sta_info *sta, u32 mask, u32 set) { int ret; if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && set & BIT(NL80211_STA_FLAG_ASSOCIATED) && !test_sta_flag(sta, WLAN_STA_ASSOC)) { /* * When peer becomes associated, init rate control as * well. Some drivers require rate control initialized * before drv_sta_state() is called. */ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) rate_control_rate_init_all_links(sta); ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); else ret = 0; if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && test_sta_flag(sta, WLAN_STA_ASSOC)) { ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && test_sta_flag(sta, WLAN_STA_AUTH)) { ret = sta_info_move_state(sta, IEEE80211_STA_NONE); if (ret) return ret; } return 0; } static void sta_apply_mesh_params(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { #ifdef CONFIG_MAC80211_MESH struct ieee80211_sub_if_data *sdata = sta->sdata; u64 changed = 0; if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) changed = mesh_plink_inc_estab_count(sdata); sta->mesh->plink_state = params->plink_state; sta->mesh->aid = params->peer_aid; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, sdata->u.mesh.mshcfg.power_mode); ewma_mesh_tx_rate_avg_init(&sta->mesh->tx_rate_avg); /* init at low value */ ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, 10); break; case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: case NL80211_PLINK_OPN_SNT: case NL80211_PLINK_OPN_RCVD: case NL80211_PLINK_CNF_RCVD: case NL80211_PLINK_HOLDING: if (sta->mesh->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->mesh->plink_state = params->plink_state; ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, NL80211_MESH_POWER_UNKNOWN); break; default: /* nothing */ break; } } switch (params->plink_action) { case NL80211_PLINK_ACTION_NO_ACTION: /* nothing */ break; case NL80211_PLINK_ACTION_OPEN: changed |= mesh_plink_open(sta); break; case NL80211_PLINK_ACTION_BLOCK: changed |= mesh_plink_block(sta); break; } if (params->local_pm) changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); ieee80211_mbss_info_change_notify(sdata, changed); #endif } enum sta_link_apply_mode { STA_LINK_MODE_NEW, STA_LINK_MODE_STA_MODIFY, STA_LINK_MODE_LINK_MODIFY, }; static int sta_link_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, enum sta_link_apply_mode mode, struct link_station_parameters *params) { struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 link_id = params->link_id < 0 ? 0 : params->link_id; struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); bool changes = params->link_mac || params->txpwr_set || params->supported_rates_len || params->ht_capa || params->vht_capa || params->he_capa || params->eht_capa || params->opmode_notif_used; switch (mode) { case STA_LINK_MODE_NEW: if (!params->link_mac) return -EINVAL; break; case STA_LINK_MODE_LINK_MODIFY: break; case STA_LINK_MODE_STA_MODIFY: if (params->link_id >= 0) break; if (!changes) return 0; break; } if (!link || !link_sta) return -EINVAL; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->link_mac) { if (mode == STA_LINK_MODE_NEW) { memcpy(link_sta->addr, params->link_mac, ETH_ALEN); memcpy(link_sta->pub->addr, params->link_mac, ETH_ALEN); } else if (!ether_addr_equal(link_sta->addr, params->link_mac)) { return -EINVAL; } } if (params->txpwr_set) { int ret; link_sta->pub->txpwr.type = params->txpwr.type; if (params->txpwr.type == NL80211_TX_POWER_LIMITED) link_sta->pub->txpwr.power = params->txpwr.power; ret = drv_sta_set_txpwr(local, sdata, sta); if (ret) return ret; } if (params->supported_rates && params->supported_rates_len) { ieee80211_parse_bitrates(link->conf->chanreq.oper.width, sband, params->supported_rates, params->supported_rates_len, &link_sta->pub->supp_rates[sband->band]); } if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, params->ht_capa, link_sta); /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, params->vht_capa, NULL, link_sta); if (params->he_capa) ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, (void *)params->he_capa, params->he_capa_len, (void *)params->he_6ghz_capa, link_sta); if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, (u8 *)params->he_capa, params->he_capa_len, params->eht_capa, params->eht_capa_len, link_sta); ieee80211_sta_init_nss(link_sta); if (params->opmode_notif_used) { /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ __ieee80211_vht_handle_opmode(sdata, link_sta, params->opmode_notif, sband->band); } return 0; } static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 mask, set; int ret = 0; mask = params->sta_flags_mask; set = params->sta_flags_set; if (ieee80211_vif_is_mesh(&sdata->vif)) { /* * In mesh mode, ASSOCIATED isn't part of the nl80211 * API but must follow AUTHENTICATED for driver state. */ if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { /* * TDLS -- everything follows authorized, but * only becoming authorized is possible, not * going back */ if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); } } if (mask & BIT(NL80211_STA_FLAG_WME) && local->hw.queues >= IEEE80211_NUM_ACS) sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); /* auth flags will be set later for TDLS, * and for unassociated stations that move to associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) && (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) set_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); else clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); } if (mask & BIT(NL80211_STA_FLAG_MFP)) { sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP)); if (set & BIT(NL80211_STA_FLAG_MFP)) set_sta_flag(sta, WLAN_STA_MFP); else clear_sta_flag(sta, WLAN_STA_MFP); } if (mask & BIT(NL80211_STA_FLAG_TDLS_PEER)) { if (set & BIT(NL80211_STA_FLAG_TDLS_PEER)) set_sta_flag(sta, WLAN_STA_TDLS_PEER); else clear_sta_flag(sta, WLAN_STA_TDLS_PEER); } if (mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) sta->sta.spp_amsdu = set & BIT(NL80211_STA_FLAG_SPP_AMSDU); /* mark TDLS channel switch support, if the AP allows it */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->deflink.u.mgd.tdls_chan_switch_prohibited && params->ext_capab_len >= 4 && params->ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH) set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->u.mgd.tdls_wider_bw_prohibited && ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && params->ext_capab_len >= 8 && params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED) set_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW); if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) { sta->sta.uapsd_queues = params->uapsd_queues; sta->sta.max_sp = params->max_sp; } ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab, params->ext_capab_len); /* * cfg80211 validates this (1-2007) and allows setting the AID * only when creating a new station entry */ if (params->aid) sta->sta.aid = params->aid; /* * Some of the following updates would be racy if called on an * existing station, via ieee80211_change_station(). However, * all such changes are rejected by cfg80211 except for updates * changing the supported rates on an existing but not yet used * TDLS peer. */ if (params->listen_interval >= 0) sta->listen_interval = params->listen_interval; ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_STA_MODIFY, &params->link_sta_params); if (ret) return ret; if (params->support_p2p_ps >= 0) sta->sta.support_p2p_ps = params->support_p2p_ps; if (ieee80211_vif_is_mesh(&sdata->vif)) sta_apply_mesh_params(local, sta, params); if (params->airtime_weight) sta->airtime_weight = params->airtime_weight; /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || set & BIT(NL80211_STA_FLAG_ASSOCIATED)) { ret = sta_apply_auth_flags(local, sta, mask, set); if (ret) return ret; } /* Mark the STA as MLO if MLD MAC address is available */ if (params->link_sta_params.mld_mac) sta->sta.mlo = true; return 0; } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; lockdep_assert_wiphy(local->hw.wiphy); if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; if (!is_valid_ether_addr(mac)) return -EINVAL; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && sdata->vif.type == NL80211_IFTYPE_STATION && !sdata->u.mgd.associated) return -EINVAL; /* * If we have a link ID, it can be a non-MLO station on an AP MLD, * but we need to have a link_mac in that case as well, so use the * STA's MAC address in that case. */ if (params->link_sta_params.link_id >= 0) sta = sta_info_alloc_with_link(sdata, mac, params->link_sta_params.link_id, params->link_sta_params.link_mac ?: mac, GFP_KERNEL); else sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; /* Though the mutex is not needed here (since the station is not * visible yet), sta_apply_parameters (and inner functions) require * the mutex due to other paths. */ err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); return err; } /* * for TDLS and for unassociated station, rate control should be * initialized only when rates are known and station is marked * authorized/associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init_all_links(sta); return sta_info_insert(sta); } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, struct station_del_parameters *params) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (params->mac) return sta_info_destroy_addr_bss(sdata, params->mac); sta_info_flush(sdata, params->link_id); return 0; } static int ieee80211_change_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; enum cfg80211_station_type statype; int err; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (!sta) return -ENOENT; switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: if (sdata->u.mesh.user_mpm) statype = CFG80211_STA_MESH_PEER_USER; else statype = CFG80211_STA_MESH_PEER_KERNEL; break; case NL80211_IFTYPE_ADHOC: statype = CFG80211_STA_IBSS; break; case NL80211_IFTYPE_STATION: if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { statype = CFG80211_STA_AP_STA; break; } if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) statype = CFG80211_STA_TDLS_PEER_ACTIVE; else statype = CFG80211_STA_TDLS_PEER_SETUP; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: if (test_sta_flag(sta, WLAN_STA_ASSOC)) statype = CFG80211_STA_AP_CLIENT; else statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; default: return -EOPNOTSUPP; } err = cfg80211_check_station_change(wiphy, params, statype); if (err) return err; if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) return -EBUSY; rcu_assign_pointer(vlansdata->u.vlan.sta, sta); __ieee80211_check_fast_rx_iface(vlansdata); drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sta->sdata->u.vlan.sta) RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) ieee80211_vif_dec_num_mcast(sta->sdata); sta->sdata = vlansdata; ieee80211_check_fast_rx(sta); ieee80211_check_fast_xmit(sta); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); } } err = sta_apply_parameters(local, sta, params); if (err) return err; if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); } return 0; } #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_add(sdata, dst); if (IS_ERR(mpath)) { rcu_read_unlock(); return PTR_ERR(mpath); } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (dst) return mesh_path_del(sdata, dst); mesh_path_flush_by_iface(sdata); return 0; } static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); sta = sta_info_get(sdata, next_hop); if (!sta) { rcu_read_unlock(); return -ENOENT; } mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } mesh_path_fix_nexthop(mpath, sta); rcu_read_unlock(); return 0; } static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, struct mpath_info *pinfo) { struct sta_info *next_hop_sta = rcu_dereference(mpath->next_hop); if (next_hop_sta) memcpy(next_hop, next_hop_sta->sta.addr, ETH_ALEN); else eth_zero_addr(next_hop); memset(pinfo, 0, sizeof(*pinfo)); pinfo->generation = mpath->sdata->u.mesh.mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | MPATH_INFO_SN | MPATH_INFO_METRIC | MPATH_INFO_EXPTIME | MPATH_INFO_DISCOVERY_TIMEOUT | MPATH_INFO_DISCOVERY_RETRIES | MPATH_INFO_FLAGS | MPATH_INFO_HOP_COUNT | MPATH_INFO_PATH_CHANGE; pinfo->frame_qlen = mpath->frame_queue.qlen; pinfo->sn = mpath->sn; pinfo->metric = mpath->metric; if (time_before(jiffies, mpath->exp_time)) pinfo->exptime = jiffies_to_msecs(mpath->exp_time - jiffies); pinfo->discovery_timeout = jiffies_to_msecs(mpath->discovery_timeout); pinfo->discovery_retries = mpath->discovery_retries; if (mpath->flags & MESH_PATH_ACTIVE) pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; if (mpath->flags & MESH_PATH_SN_VALID) pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; if (mpath->flags & MESH_PATH_RESOLVED) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED; pinfo->hop_count = mpath->hop_count; pinfo->path_change_count = mpath->path_change_count; } static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mesh_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpath_set_pinfo(mpath, next_hop, pinfo); rcu_read_unlock(); return 0; } static void mpp_set_pinfo(struct mesh_path *mpath, u8 *mpp, struct mpath_info *pinfo) { memset(pinfo, 0, sizeof(*pinfo)); memcpy(mpp, mpath->mpp, ETH_ALEN); pinfo->generation = mpath->sdata->u.mesh.mpp_paths_generation; } static int ieee80211_get_mpp(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup(sdata, dst); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_dump_mpp(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *mpp, struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); mpath = mpp_path_lookup_by_idx(sdata, idx); if (!mpath) { rcu_read_unlock(); return -ENOENT; } memcpy(dst, mpath->dst, ETH_ALEN); mpp_set_pinfo(mpath, mpp, pinfo); rcu_read_unlock(); return 0; } static int ieee80211_get_mesh_config(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); return 0; } static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) { return (mask >> (parm-1)) & 0x1; } static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); int i; /* allocate information elements */ new_ie = NULL; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, GFP_KERNEL); if (!new_ie) return -ENOMEM; } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); ifmsh->mesh_sp_id = setup->sync_method; ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; ifmsh->userspace_handles_dfs = setup->userspace_handles_dfs; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; if (setup->is_secure) ifmsh->security |= IEEE80211_MESH_SEC_SECURED; /* mcast rate setting in Mesh Node */ memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); sdata->vif.bss_conf.basic_rates = setup->basic_rates; sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; sdata->beacon_rate_set = false; if (wiphy_ext_feature_isset(sdata->local->hw.wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { for (i = 0; i < NUM_NL80211_BANDS; i++) { sdata->beacon_rateidx_mask[i] = setup->beacon_rate.control[i].legacy; if (sdata->beacon_rateidx_mask[i]) sdata->beacon_rate_set = true; } } return 0; } static int ieee80211_update_mesh_config(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf) { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_mesh *ifmsh; sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifmsh = &sdata->u.mesh; /* Set the config options which we are interested in setting */ conf = &(sdata->u.mesh.mshcfg); if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) conf->dot11MeshRetryTimeout = nconf->dot11MeshRetryTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONFIRM_TIMEOUT, mask)) conf->dot11MeshConfirmTimeout = nconf->dot11MeshConfirmTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HOLDING_TIMEOUT, mask)) conf->dot11MeshHoldingTimeout = nconf->dot11MeshHoldingTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_PEER_LINKS, mask)) conf->dot11MeshMaxPeerLinks = nconf->dot11MeshMaxPeerLinks; if (_chg_mesh_attr(NL80211_MESHCONF_MAX_RETRIES, mask)) conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) conf->dot11MeshTTL = nconf->dot11MeshTTL; if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) conf->element_ttl = nconf->element_ttl; if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) { if (ifmsh->user_mpm) return -EBUSY; conf->auto_open_plinks = nconf->auto_open_plinks; } if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask)) conf->dot11MeshNbrOffsetMaxNeighbor = nconf->dot11MeshNbrOffsetMaxNeighbor; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) conf->dot11MeshHWMPmaxPREQretries = nconf->dot11MeshHWMPmaxPREQretries; if (_chg_mesh_attr(NL80211_MESHCONF_PATH_REFRESH_TIME, mask)) conf->path_refresh_time = nconf->path_refresh_time; if (_chg_mesh_attr(NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, mask)) conf->min_discovery_timeout = nconf->min_discovery_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathTimeout = nconf->dot11MeshHWMPactivePathTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask)) conf->dot11MeshHWMPpreqMinInterval = nconf->dot11MeshHWMPpreqMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, mask)) conf->dot11MeshHWMPperrMinInterval = nconf->dot11MeshHWMPperrMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, mask)) conf->dot11MeshHWMPnetDiameterTraversalTime = nconf->dot11MeshHWMPnetDiameterTraversalTime; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOTMODE, mask)) { conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; ieee80211_mesh_root_setup(ifmsh); } if (_chg_mesh_attr(NL80211_MESHCONF_GATE_ANNOUNCEMENTS, mask)) { /* our current gate announcement implementation rides on root * announcements, so require this ifmsh to also be a root node * */ if (nconf->dot11MeshGateAnnouncementProtocol && !(conf->dot11MeshHWMPRootMode > IEEE80211_ROOTMODE_ROOT)) { conf->dot11MeshHWMPRootMode = IEEE80211_PROACTIVE_RANN; ieee80211_mesh_root_setup(ifmsh); } conf->dot11MeshGateAnnouncementProtocol = nconf->dot11MeshGateAnnouncementProtocol; } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) conf->dot11MeshHWMPRannInterval = nconf->dot11MeshHWMPRannInterval; if (_chg_mesh_attr(NL80211_MESHCONF_FORWARDING, mask)) conf->dot11MeshForwarding = nconf->dot11MeshForwarding; if (_chg_mesh_attr(NL80211_MESHCONF_RSSI_THRESHOLD, mask)) { /* our RSSI threshold implementation is supported only for * devices that report signal in dBm. */ if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM)) return -EOPNOTSUPP; conf->rssi_threshold = nconf->rssi_threshold; } if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) { conf->ht_opmode = nconf->ht_opmode; sdata->vif.bss_conf.ht_operation_mode = nconf->ht_opmode; ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_HT); } if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, mask)) conf->dot11MeshHWMPactivePathToRootTimeout = nconf->dot11MeshHWMPactivePathToRootTimeout; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) conf->dot11MeshHWMProotInterval = nconf->dot11MeshHWMProotInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) conf->dot11MeshHWMPconfirmationInterval = nconf->dot11MeshHWMPconfirmationInterval; if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) { conf->power_mode = nconf->power_mode; ieee80211_mps_local_status_update(sdata); } if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) conf->dot11MeshAwakeWindowDuration = nconf->dot11MeshAwakeWindowDuration; if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask)) conf->plink_timeout = nconf->plink_timeout; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask)) conf->dot11MeshConnectedToMeshGate = nconf->dot11MeshConnectedToMeshGate; if (_chg_mesh_attr(NL80211_MESHCONF_NOLEARN, mask)) conf->dot11MeshNolearn = nconf->dot11MeshNolearn; if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_AS, mask)) conf->dot11MeshConnectedToAuthServer = nconf->dot11MeshConnectedToAuthServer; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, const struct mesh_config *conf, const struct mesh_setup *setup) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = setup->chandef }; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; int err; lockdep_assert_wiphy(sdata->local->hw.wiphy); memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); err = copy_mesh_setup(ifmsh, setup); if (err) return err; sdata->control_port_over_nl80211 = setup->control_port_over_nl80211; /* can mesh use other SMPS modes? */ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; return ieee80211_start_mesh(sdata); } static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); ieee80211_stop_mesh(sdata); ieee80211_link_release_channel(&sdata->deflink); kfree(sdata->u.mesh.ie); return 0; } #endif static int ieee80211_change_bss(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_supported_band *sband; u64 changed = 0; link = ieee80211_link_or_deflink(sdata, params->link_id, true); if (IS_ERR(link)) return PTR_ERR(link); if (!sdata_dereference(link->u.ap.beacon, sdata)) return -ENOENT; sband = ieee80211_get_link_sband(link); if (!sband) return -EINVAL; if (params->basic_rates) { if (!ieee80211_parse_bitrates(link->conf->chanreq.oper.width, wiphy->bands[sband->band], params->basic_rates, params->basic_rates_len, &link->conf->basic_rates)) return -EINVAL; changed |= BSS_CHANGED_BASIC_RATES; ieee80211_check_rate_mask(link); } if (params->use_cts_prot >= 0) { link->conf->use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (params->use_short_preamble >= 0) { link->conf->use_short_preamble = params->use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } if (!link->conf->use_short_slot && (sband->band == NL80211_BAND_5GHZ || sband->band == NL80211_BAND_6GHZ)) { link->conf->use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } if (params->use_short_slot_time >= 0) { link->conf->use_short_slot = params->use_short_slot_time; changed |= BSS_CHANGED_ERP_SLOT; } if (params->ap_isolate >= 0) { if (params->ap_isolate) sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; else sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS; ieee80211_check_fast_rx_iface(sdata); } if (params->ht_opmode >= 0) { link->conf->ht_operation_mode = (u16)params->ht_opmode; changed |= BSS_CHANGED_HT; } if (params->p2p_ctwindow >= 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; link->conf->p2p_noa_attr.oppps_ctwindow |= params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; changed |= BSS_CHANGED_P2P_PS; } if (params->p2p_opp_ps > 0) { link->conf->p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } else if (params->p2p_opp_ps == 0) { link->conf->p2p_noa_attr.oppps_ctwindow &= ~IEEE80211_P2P_OPPPS_ENABLE_BIT; changed |= BSS_CHANGED_P2P_PS; } ieee80211_link_info_change_notify(sdata, link, changed); return 0; } static int ieee80211_set_txq_params(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, params->link_id, true); struct ieee80211_tx_queue_params p; if (!local->ops->conf_tx) return -EOPNOTSUPP; if (local->hw.queues < IEEE80211_NUM_ACS) return -EOPNOTSUPP; if (IS_ERR(link)) return PTR_ERR(link); memset(&p, 0, sizeof(p)); p.aifs = params->aifs; p.cw_max = params->cwmax; p.cw_min = params->cwmin; p.txop = params->txop; /* * Setting tx queue params disables u-apsd because it's only * called in master mode. */ p.uapsd = false; ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac); link->tx_conf[params->ac] = p; if (drv_conf_tx(local, link, params->ac, &p)) { wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for AC %d\n", params->ac); return -EINVAL; } ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_QOS); return 0; } #ifdef CONFIG_PM static int ieee80211_suspend(struct wiphy *wiphy, struct cfg80211_wowlan *wowlan) { return __ieee80211_suspend(wiphy_priv(wiphy), wowlan); } static int ieee80211_resume(struct wiphy *wiphy) { return __ieee80211_resume(wiphy_priv(wiphy)); } #else #define ieee80211_suspend NULL #define ieee80211_resume NULL #endif static int ieee80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *req) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(req->wdev); switch (ieee80211_vif_type_p2p(&sdata->vif)) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) break; /* * FIXME: implement NoA while scanning in software, * for now fall through to allow scanning only when * beaconing hasn't been configured yet */ fallthrough; case NL80211_IFTYPE_AP: /* * If the scan has been forced (and the driver supports * forcing), don't care about being beaconing already. * This will create problems to the attached stations (e.g. all * the frames sent while scanning on other channel will be * lost) */ if (sdata->deflink.u.ap.beacon && (!(wiphy->features & NL80211_FEATURE_AP_SCAN) || !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } return ieee80211_request_scan(sdata, req); } static void ieee80211_abort_scan(struct wiphy *wiphy, struct wireless_dev *wdev) { ieee80211_scan_cancel(wiphy_priv(wiphy)); } static int ieee80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_sched_scan_request *req) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!sdata->local->ops->sched_scan_start) return -EOPNOTSUPP; return ieee80211_request_sched_scan_start(sdata, req); } static int ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev, u64 reqid) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->sched_scan_stop) return -EOPNOTSUPP; return ieee80211_request_sched_scan_stop(local); } static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_auth_request *req) { return ieee80211_mgd_auth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_assoc_request *req) { return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_deauth_request *req) { return ieee80211_mgd_deauth(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req) { return ieee80211_mgd_disassoc(IEEE80211_DEV_TO_SUB_IF(dev), req); } static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *params) { return ieee80211_ibss_join(IEEE80211_DEV_TO_SUB_IF(dev), params); } static int ieee80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ibss_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_join_ocb(struct wiphy *wiphy, struct net_device *dev, struct ocb_setup *setup) { return ieee80211_ocb_join(IEEE80211_DEV_TO_SUB_IF(dev), setup); } static int ieee80211_leave_ocb(struct wiphy *wiphy, struct net_device *dev) { return ieee80211_ocb_leave(IEEE80211_DEV_TO_SUB_IF(dev)); } static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, int rate[NUM_NL80211_BANDS]) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(int) * NUM_NL80211_BANDS); if (ieee80211_sdata_running(sdata)) ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_MCAST_RATE); return 0; } static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) { struct ieee80211_local *local = wiphy_priv(wiphy); int err; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { ieee80211_check_fast_xmit_all(local); err = drv_set_frag_threshold(local, wiphy->frag_threshold); if (err) { ieee80211_check_fast_xmit_all(local); return err; } } if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || (changed & WIPHY_PARAM_DYN_ACK)) { s16 coverage_class; coverage_class = changed & WIPHY_PARAM_COVERAGE_CLASS ? wiphy->coverage_class : -1; err = drv_set_coverage_class(local, coverage_class); if (err) return err; } if (changed & WIPHY_PARAM_RTS_THRESHOLD) { err = drv_set_rts_threshold(local, wiphy->rts_threshold); if (err) return err; } if (changed & WIPHY_PARAM_RETRY_SHORT) { if (wiphy->retry_short > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; } if (changed & WIPHY_PARAM_RETRY_LONG) { if (wiphy->retry_long > IEEE80211_MAX_TX_RETRY) return -EINVAL; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; } if (changed & (WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); if (changed & (WIPHY_PARAM_TXQ_LIMIT | WIPHY_PARAM_TXQ_MEMORY_LIMIT | WIPHY_PARAM_TXQ_QUANTUM)) ieee80211_txq_set_params(local); return 0; } static int ieee80211_set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; enum nl80211_tx_power_setting txp_type = type; bool update_txp_type = false; bool has_monitor = false; int user_power_level; int old_power = local->user_power_level; lockdep_assert_wiphy(local->hw.wiphy); switch (type) { case NL80211_TX_POWER_AUTOMATIC: user_power_level = IEEE80211_UNSET_POWER_LEVEL; txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: if (mbm < 0 || (mbm % 100)) return -EOPNOTSUPP; user_power_level = MBM_TO_DBM(mbm); break; default: return -EINVAL; } if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) return -EOPNOTSUPP; sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) return -EOPNOTSUPP; } for (int link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link) continue; link->user_power_level = user_power_level; if (txp_type != link->conf->txpower_type) { update_txp_type = true; link->conf->txpower_type = txp_type; } ieee80211_recalc_txpower(link, update_txp_type); } return 0; } local->user_power_level = user_power_level; list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { has_monitor = true; continue; } for (int link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link) continue; link->user_power_level = local->user_power_level; if (txp_type != link->conf->txpower_type) update_txp_type = true; link->conf->txpower_type = txp_type; } } list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; for (int link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link) continue; ieee80211_recalc_txpower(link, update_txp_type); } } if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; ieee80211_recalc_txpower(&sdata->deflink, update_txp_type); } } if (local->emulate_chanctx && (old_power != local->user_power_level)) ieee80211_hw_conf_chan(local); return 0; } static int ieee80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id, int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_link_data *link_data; if (local->ops->get_txpower && (sdata->flags & IEEE80211_SDATA_IN_DRIVER)) return drv_get_txpower(local, sdata, link_id, dbm); if (local->emulate_chanctx) { *dbm = local->hw.conf.power_level; } else { link_data = wiphy_dereference(wiphy, sdata->link[link_id]); if (link_data) *dbm = link_data->conf->txpower; else return -ENOLINK; } /* INT_MIN indicates no power level was set yet */ if (*dbm == INT_MIN) return -EINVAL; return 0; } static void ieee80211_rfkill_poll(struct wiphy *wiphy) { struct ieee80211_local *local = wiphy_priv(wiphy); drv_rfkill_poll(local); } #ifdef CONFIG_NL80211_TESTMODE static int ieee80211_testmode_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_vif *vif = NULL; if (!local->ops->testmode_cmd) return -EOPNOTSUPP; if (wdev) { struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) vif = &sdata->vif; } return local->ops->testmode_cmd(&local->hw, vif, data, len); } static int ieee80211_testmode_dump(struct wiphy *wiphy, struct sk_buff *skb, struct netlink_callback *cb, void *data, int len) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->testmode_dump) return -EOPNOTSUPP; return local->ops->testmode_dump(&local->hw, skb, cb, data, len); } #endif int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, enum ieee80211_smps_mode smps_mode) { const u8 *ap; enum ieee80211_smps_mode old_req; int err; struct sta_info *sta; bool tdls_peer_found = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return 0; old_req = link->u.mgd.req_smps; link->u.mgd.req_smps = smps_mode; /* The driver indicated that EML is enabled for the interface, which * implies that SMPS flows towards the AP should be stopped. */ if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) return 0; if (old_req == smps_mode && smps_mode != IEEE80211_SMPS_AUTOMATIC) return 0; /* * If not associated, or current association is not an HT * association, there's no need to do anything, just store * the new value until we associate. */ if (!sdata->u.mgd.associated || link->conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; ap = sdata->vif.cfg.ap_addr; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) continue; tdls_peer_found = true; break; } rcu_read_unlock(); if (smps_mode == IEEE80211_SMPS_AUTOMATIC) { if (tdls_peer_found || !sdata->u.mgd.powersave) smps_mode = IEEE80211_SMPS_OFF; else smps_mode = IEEE80211_SMPS_DYNAMIC; } /* send SM PS frame to AP */ err = ieee80211_send_smps_action(sdata, smps_mode, ap, ap, ieee80211_vif_is_mld(&sdata->vif) ? link->link_id : -1); if (err) link->u.mgd.req_smps = old_req; else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found) ieee80211_teardown_tdls_peers(link); return err; } static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); unsigned int link_id; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && timeout == local->dynamic_ps_forced_timeout) return 0; sdata->u.mgd.powersave = enabled; local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; __ieee80211_request_smps_mgd(sdata, link, link->u.mgd.req_smps); } if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); ieee80211_check_fast_rx_iface(sdata); return 0; } static void ieee80211_set_cqm_rssi_link(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, s32 rssi_thold, u32 rssi_hyst, s32 rssi_low, s32 rssi_high) { struct ieee80211_bss_conf *conf; if (!link || !link->conf) return; conf = link->conf; if (rssi_thold && rssi_hyst && rssi_thold == conf->cqm_rssi_thold && rssi_hyst == conf->cqm_rssi_hyst) return; conf->cqm_rssi_thold = rssi_thold; conf->cqm_rssi_hyst = rssi_hyst; conf->cqm_rssi_low = rssi_low; conf->cqm_rssi_high = rssi_high; link->u.mgd.last_cqm_event_signal = 0; if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return; if (sdata->u.mgd.associated && (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_CQM); } static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER && !(vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, rssi_thold, rssi_hyst, 0, 0); } return 0; } static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_low, s32 rssi_high) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; int link_id; if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER) return -EOPNOTSUPP; /* For MLD, handle CQM change on all the active links */ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); ieee80211_set_cqm_rssi_link(sdata, link, 0, 0, rssi_low, rssi_high); } return 0; } static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int i, ret; if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; /* * If active validate the setting and reject it if it doesn't leave * at least one basic rate usable, since we really have to be able * to send something, and if we're an AP we have to be able to do * so at a basic rate so that all clients can receive it. */ if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) && sdata->vif.bss_conf.chanreq.oper.chan) { u32 basic_rates = sdata->vif.bss_conf.basic_rates; enum nl80211_band band; band = sdata->vif.bss_conf.chanreq.oper.chan->band; if (!(mask->control[band].legacy & basic_rates)) return -EINVAL; } if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { ret = drv_set_bitrate_mask(local, sdata, mask); if (ret) return ret; } for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; sdata->rc_rateidx_mask[i] = mask->control[i].legacy; memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs, sizeof(mask->control[i].ht_mcs)); memcpy(sdata->rc_rateidx_vht_mcs_mask[i], mask->control[i].vht_mcs, sizeof(mask->control[i].vht_mcs)); sdata->rc_has_mcs_mask[i] = false; sdata->rc_has_vht_mcs_mask[i] = false; if (!sband) continue; for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) { if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) { sdata->rc_has_mcs_mask[i] = true; break; } } for (j = 0; j < NL80211_VHT_NSS_MAX; j++) { if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) { sdata->rc_has_vht_mcs_mask[i] = true; break; } } } return 0; } static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, u32 cac_time_ms, int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = *chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; link_data = sdata_dereference(sdata->link[link_id], sdata); if (!link_data) return -ENOLINK; /* whatever, but channel contexts should not complain about that one */ link_data->smps_mode = IEEE80211_SMPS_OFF; link_data->needed_rx_chains = local->rx_chains; err = ieee80211_link_use_channel(link_data, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; wiphy_delayed_work_queue(wiphy, &link_data->dfs_cac_timer_work, msecs_to_jiffies(cac_time_ms)); return 0; } static void ieee80211_end_cac(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { link_data = sdata_dereference(sdata->link[link_id], sdata); if (!link_data) continue; wiphy_delayed_work_cancel(wiphy, &link_data->dfs_cac_timer_work); if (sdata->wdev.links[link_id].cac_started) { ieee80211_link_release_channel(link_data); sdata->wdev.links[link_id].cac_started = false; } } } static struct cfg80211_beacon_data * cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) { struct cfg80211_beacon_data *new_beacon; u8 *pos; int len; len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len + beacon->proberesp_ies_len + beacon->assocresp_ies_len + beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len; if (beacon->mbssid_ies) len += ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies, beacon->rnr_ies, beacon->mbssid_ies->cnt); new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL); if (!new_beacon) return NULL; if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { new_beacon->mbssid_ies = kzalloc(struct_size(new_beacon->mbssid_ies, elem, beacon->mbssid_ies->cnt), GFP_KERNEL); if (!new_beacon->mbssid_ies) { kfree(new_beacon); return NULL; } if (beacon->rnr_ies && beacon->rnr_ies->cnt) { new_beacon->rnr_ies = kzalloc(struct_size(new_beacon->rnr_ies, elem, beacon->rnr_ies->cnt), GFP_KERNEL); if (!new_beacon->rnr_ies) { kfree(new_beacon->mbssid_ies); kfree(new_beacon); return NULL; } } } pos = (u8 *)(new_beacon + 1); if (beacon->head_len) { new_beacon->head_len = beacon->head_len; new_beacon->head = pos; memcpy(pos, beacon->head, beacon->head_len); pos += beacon->head_len; } if (beacon->tail_len) { new_beacon->tail_len = beacon->tail_len; new_beacon->tail = pos; memcpy(pos, beacon->tail, beacon->tail_len); pos += beacon->tail_len; } if (beacon->beacon_ies_len) { new_beacon->beacon_ies_len = beacon->beacon_ies_len; new_beacon->beacon_ies = pos; memcpy(pos, beacon->beacon_ies, beacon->beacon_ies_len); pos += beacon->beacon_ies_len; } if (beacon->proberesp_ies_len) { new_beacon->proberesp_ies_len = beacon->proberesp_ies_len; new_beacon->proberesp_ies = pos; memcpy(pos, beacon->proberesp_ies, beacon->proberesp_ies_len); pos += beacon->proberesp_ies_len; } if (beacon->assocresp_ies_len) { new_beacon->assocresp_ies_len = beacon->assocresp_ies_len; new_beacon->assocresp_ies = pos; memcpy(pos, beacon->assocresp_ies, beacon->assocresp_ies_len); pos += beacon->assocresp_ies_len; } if (beacon->probe_resp_len) { new_beacon->probe_resp_len = beacon->probe_resp_len; new_beacon->probe_resp = pos; memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) { pos += ieee80211_copy_mbssid_beacon(pos, new_beacon->mbssid_ies, beacon->mbssid_ies); if (beacon->rnr_ies && beacon->rnr_ies->cnt) pos += ieee80211_copy_rnr_beacon(pos, new_beacon->rnr_ies, beacon->rnr_ies); } /* might copy -1, meaning no changes requested */ new_beacon->ftm_responder = beacon->ftm_responder; if (beacon->lci) { new_beacon->lci_len = beacon->lci_len; new_beacon->lci = pos; memcpy(pos, beacon->lci, beacon->lci_len); pos += beacon->lci_len; } if (beacon->civicloc) { new_beacon->civicloc_len = beacon->civicloc_len; new_beacon->civicloc = pos; memcpy(pos, beacon->civicloc, beacon->civicloc_len); pos += beacon->civicloc_len; } return new_beacon; } void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_link_data *link_data; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link_data = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link_data)) { rcu_read_unlock(); return; } /* TODO: MBSSID with MLO changes */ if (vif->mbssid_tx_vif == vif) { /* Trigger ieee80211_csa_finish() on the non-transmitting * interfaces when channel switch is received on * transmitting interface */ struct ieee80211_sub_if_data *iter; list_for_each_entry_rcu(iter, &local->interfaces, list) { if (!ieee80211_sdata_running(iter)) continue; if (iter == sdata || iter->vif.mbssid_tx_vif != vif) continue; wiphy_work_queue(iter->local->hw.wiphy, &iter->deflink.csa.finalize_work); } } wiphy_work_queue(local->hw.wiphy, &link_data->csa.finalize_work); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_csa_finish); void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; sdata_info(sdata, "channel switch failed, disconnecting\n"); wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work); } EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); static int ieee80211_set_after_csa_beacon(struct ieee80211_link_data *link_data, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: if (!link_data->u.ap.next_beacon) return -EINVAL; err = ieee80211_assign_beacon(sdata, link_data, link_data->u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(link_data); if (err < 0) return err; break; case NL80211_IFTYPE_ADHOC: err = ieee80211_ibss_finish_csa(sdata, changed); if (err < 0) return err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata, changed); if (err < 0) return err; break; #endif default: WARN_ON(1); return -EINVAL; } return 0; } static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_bss_conf *link_conf = link_data->conf; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); /* * using reservation isn't immediate as it may be deferred until later * with multi-vif. once reservation is complete it will re-schedule the * work with no reserved_chanctx so verify chandef to check if it * completed successfully */ if (link_data->reserved_chanctx) { /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their * reservations */ if (link_data->reserved_ready) return 0; return ieee80211_link_use_reserved_context(link_data); } if (!cfg80211_chandef_identical(&link_conf->chanreq.oper, &link_data->csa.chanreq.oper)) return -EINVAL; link_conf->csa_active = false; err = ieee80211_set_after_csa_beacon(link_data, &changed); if (err) return err; ieee80211_link_info_change_notify(sdata, link_data, changed); ieee80211_vif_unblock_queues_csa(sdata); err = drv_post_channel_switch(link_data); if (err) return err; cfg80211_ch_switch_notify(sdata->dev, &link_data->csa.chanreq.oper, link_data->link_id); return 0; } static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; if (__ieee80211_csa_finalize(link_data)) { sdata_info(sdata, "failed to finalize CSA on link %d, disconnecting\n", link_data->link_id); cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, GFP_KERNEL); } } void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, csa.finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!link->conf->csa_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_csa_finalize(link); } static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data, struct cfg80211_csa_settings *params, u64 *changed) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_csa_settings csa = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: link_data->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_after); if (!link_data->u.ap.next_beacon) return -ENOMEM; /* * With a count of 0, we don't have to wait for any * TBTT before switching, so complete the CSA * immediately. In theory, with a count == 1 we * should delay the switch until just before the next * TBTT, but that would complicate things so we switch * immediately too. If we would delay the switch * until the next TBTT, we would have to set the probe * response here. * * TODO: A channel switch with count <= 1 without * sending a CSA action frame is kind of useless, * because the clients won't know we're changing * channels. The action frame must be implemented * either here or in the userspace. */ if (params->count <= 1) break; if ((params->n_counter_offsets_beacon > IEEE80211_MAX_CNTDWN_COUNTERS_NUM) || (params->n_counter_offsets_presp > IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) { ieee80211_free_next_beacon(link_data); return -EINVAL; } csa.counter_offsets_beacon = params->counter_offsets_beacon; csa.counter_offsets_presp = params->counter_offsets_presp; csa.n_counter_offsets_beacon = params->n_counter_offsets_beacon; csa.n_counter_offsets_presp = params->n_counter_offsets_presp; csa.count = params->count; err = ieee80211_assign_beacon(sdata, link_data, &params->beacon_csa, &csa, NULL, changed); if (err < 0) { ieee80211_free_next_beacon(link_data); return err; } break; case NL80211_IFTYPE_ADHOC: if (!sdata->vif.cfg.ibss_joined) return -EINVAL; if (params->chandef.width != sdata->u.ibss.chandef.width) return -EINVAL; switch (params->chandef.width) { case NL80211_CHAN_WIDTH_40: if (cfg80211_get_chandef_type(&params->chandef) != cfg80211_get_chandef_type(&sdata->u.ibss.chandef)) return -EINVAL; break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: break; default: return -EINVAL; } /* changes into another band are not supported */ if (sdata->u.ibss.chandef.chan->band != params->chandef.chan->band) return -EINVAL; /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_ibss_csa_beacon(sdata, params, changed); if (err < 0) return err; } ieee80211_send_action_csa(sdata, params); break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* changes into another band are not supported */ if (sdata->vif.bss_conf.chanreq.oper.chan->band != params->chandef.chan->band) return -EINVAL; if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_NONE) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_INIT; if (!ifmsh->pre_value) ifmsh->pre_value = 1; else ifmsh->pre_value++; } /* see comments in the NL80211_IFTYPE_AP block */ if (params->count > 1) { err = ieee80211_mesh_csa_beacon(sdata, params, changed); if (err < 0) { ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; return err; } } if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) ieee80211_send_action_csa(sdata, params); break; } #endif default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_abort(struct ieee80211_link_data *link) { link->conf->color_change_active = false; ieee80211_free_next_beacon(link); cfg80211_color_change_aborted_notify(link->sdata->dev, link->link_id); } static int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = params->chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_channel_switch ch_switch = { .link_id = params->link_id, }; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; struct ieee80211_bss_conf *link_conf; struct ieee80211_link_data *link_data; u64 changed = 0; u8 link_id = params->link_id; int err; lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; if (sdata->wdev.links[link_id].cac_started) return -EBUSY; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return -EINVAL; link_data = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link_data) return -ENOLINK; link_conf = link_data->conf; if (chanreq.oper.punctured && !link_conf->eht_support) return -EINVAL; /* don't allow another channel switch if one is already active. */ if (link_conf->csa_active) return -EBUSY; conf = wiphy_dereference(wiphy, link_conf->chanctx_conf); if (!conf) { err = -EBUSY; goto out; } if (params->chandef.chan->freq_offset) { /* this may work, but is untested */ err = -EOPNOTSUPP; goto out; } chanctx = container_of(conf, struct ieee80211_chanctx, conf); ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; ch_switch.block_tx = params->block_tx; ch_switch.chandef = chanreq.oper; ch_switch.count = params->count; err = drv_pre_channel_switch(sdata, &ch_switch); if (err) goto out; err = ieee80211_link_reserve_chanctx(link_data, &chanreq, chanctx->mode, params->radar_required); if (err) goto out; /* if reservation is invalid then this will fail */ err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0, -1); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } /* if there is a color change in progress, abort it */ if (link_conf->color_change_active) ieee80211_color_change_abort(link_data); err = ieee80211_set_csa_beacon(link_data, params, &changed); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; } link_data->csa.chanreq = chanreq; link_conf->csa_active = true; if (params->block_tx) ieee80211_vif_block_queues_csa(sdata); cfg80211_ch_switch_started_notify(sdata->dev, &link_data->csa.chanreq.oper, link_id, params->count, params->block_tx); if (changed) { ieee80211_link_info_change_notify(sdata, link_data, changed); drv_channel_switch_beacon(sdata, &link_data->csa.chanreq.oper); } else { /* if the beacon didn't change, we can finalize immediately */ ieee80211_csa_finalize(link_data); } out: return err; } int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); return __ieee80211_channel_switch(wiphy, dev, params); } u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) { lockdep_assert_wiphy(local->hw.wiphy); local->roc_cookie_counter++; /* wow, you wrapped 64 bits ... more likely a bug */ if (WARN_ON(local->roc_cookie_counter == 0)) local->roc_cookie_counter++; return local->roc_cookie_counter; } int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; int id; ack_skb = skb_copy(skb, gfp); if (!ack_skb) return -ENOMEM; spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); if (id < 0) { kfree_skb(ack_skb); return -ENOMEM; } IEEE80211_SKB_CB(skb)->status_data_idr = 1; IEEE80211_SKB_CB(skb)->status_data = id; *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; return 0; } static void ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, struct wireless_dev *wdev, struct mgmt_frame_regs *upd) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4); u32 action_mask = BIT(IEEE80211_STYPE_ACTION >> 4); bool global_change, intf_change; global_change = (local->probe_req_reg != !!(upd->global_stypes & preq_mask)) || (local->rx_mcast_action_reg != !!(upd->global_mcast_stypes & action_mask)); local->probe_req_reg = upd->global_stypes & preq_mask; local->rx_mcast_action_reg = upd->global_mcast_stypes & action_mask; intf_change = (sdata->vif.probe_req_reg != !!(upd->interface_stypes & preq_mask)) || (sdata->vif.rx_mcast_action_reg != !!(upd->interface_mcast_stypes & action_mask)); sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask; sdata->vif.rx_mcast_action_reg = upd->interface_mcast_stypes & action_mask; if (!local->open_count) return; if (intf_change && ieee80211_sdata_running(sdata)) drv_config_iface_filter(local, sdata, sdata->vif.probe_req_reg ? FIF_PROBE_REQ : 0, FIF_PROBE_REQ); if (global_change) ieee80211_configure_filter(local); } static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); int ret; if (local->started) return -EOPNOTSUPP; ret = drv_set_antenna(local, tx_ant, rx_ant); if (ret) return ret; local->rx_chains = hweight8(rx_ant); return 0; } static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); return drv_get_antenna(local, tx_ant, rx_ant); } static int ieee80211_set_rekey_data(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_gtk_rekey_data *data) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!local->ops->set_rekey_data) return -EOPNOTSUPP; drv_set_rekey_data(local, sdata, data); return 0; } static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos; struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; enum nl80211_band band; int ret; /* the lock is needed to assign the cookie later */ lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); sta = sta_info_get_bss(sdata, peer); if (!sta) { ret = -ENOLINK; goto unlock; } qos = sta->sta.wme; chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { ret = -EINVAL; goto unlock; } band = chanctx_conf->def.chan->band; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | IEEE80211_FCTL_FROMDS); } else { size -= 2; fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS); } skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); if (!skb) { ret = -ENOMEM; goto unlock; } skb->dev = dev; skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); nullfunc->seq_ctrl = 0; info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_NL80211_FRAME_TX; info->band = band; skb_set_queue_mapping(skb, IEEE80211_AC_VO); skb->priority = 7; if (qos) nullfunc->qos_ctrl = cpu_to_le16(7); ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_ATOMIC); if (ret) { kfree_skb(skb); goto unlock; } local_bh_disable(); ieee80211_xmit(sdata, sta, skb); local_bh_enable(); ret = 0; unlock: rcu_read_unlock(); return ret; } static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_link_data *link; int ret = -ENODATA; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (!link) { ret = -ENOLINK; goto out; } chanctx_conf = rcu_dereference(link->conf->chanctx_conf); if (chanctx_conf) { *chandef = link->conf->chanreq.oper; ret = 0; } else if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) && local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { *chandef = local->monitor_chanreq.oper; ret = 0; } out: rcu_read_unlock(); return ret; } #ifdef CONFIG_PM static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) { drv_set_wakeup(wiphy_priv(wiphy), enabled); } #endif static int ieee80211_set_qos_map(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_qos_map *qos_map) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mac80211_qos_map *new_qos_map, *old_qos_map; if (qos_map) { new_qos_map = kzalloc(sizeof(*new_qos_map), GFP_KERNEL); if (!new_qos_map) return -ENOMEM; memcpy(&new_qos_map->qos_map, qos_map, sizeof(*qos_map)); } else { /* A NULL qos_map was passed to disable QoS mapping */ new_qos_map = NULL; } old_qos_map = sdata_dereference(sdata->qos_map, sdata); rcu_assign_pointer(sdata->qos_map, new_qos_map); if (old_qos_map) kfree_rcu(old_qos_map, rcu_head); return 0; } static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; u64 changed = 0; link = sdata_dereference(sdata->link[link_id], sdata); ret = ieee80211_link_change_chanreq(link, &chanreq, &changed); if (ret == 0) ieee80211_link_info_change_notify(sdata, link, changed); return ret; } static int ieee80211_add_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer, u8 up, u16 admitted_time) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int ac = ieee802_1d_to_ac[up]; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(sdata->wmm_acm & BIT(up))) return -EINVAL; if (ifmgd->tx_tspec[ac].admitted_time) return -EBUSY; if (admitted_time) { ifmgd->tx_tspec[ac].admitted_time = 32 * admitted_time; ifmgd->tx_tspec[ac].tsid = tsid; ifmgd->tx_tspec[ac].up = up; } return 0; } static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = wiphy_priv(wiphy); int ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; /* skip unused entries */ if (!tx_tspec->admitted_time) continue; if (tx_tspec->tsid != tsid) continue; /* due to this new packets will be reassigned to non-ACM ACs */ tx_tspec->up = -1; /* Make sure that all packets have been sent to avoid to * restore the QoS params on packets that are still on the * queues. */ synchronize_net(); ieee80211_flush_queues(local, sdata, false); /* restore the normal QoS parameters * (unconditionally to avoid races) */ tx_tspec->action = TX_TSPEC_ACTION_STOP_DOWNGRADE; tx_tspec->downgraded = false; ieee80211_sta_handle_tspec_ac_params(sdata); /* finally clear all the data */ memset(tx_tspec, 0, sizeof(*tx_tspec)); return 0; } return -ENOENT; } void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, u8 inst_id, enum nl80211_nan_func_term_reason reason, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; u64 cookie; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } cookie = func->cookie; idr_remove(&sdata->u.nan.function_inst_ids, inst_id); spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_free_nan_func(func); cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, reason, cookie, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_terminated); void ieee80211_nan_func_match(struct ieee80211_vif *vif, struct cfg80211_nan_match_params *match, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct cfg80211_nan_func *func; if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; spin_lock_bh(&sdata->u.nan.func_lock); func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); if (WARN_ON(!func)) { spin_unlock_bh(&sdata->u.nan.func_lock); return; } match->cookie = func->cookie; spin_unlock_bh(&sdata->u.nan.func_lock); cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); } EXPORT_SYMBOL(ieee80211_nan_func_match); static int ieee80211_set_multicast_to_unicast(struct wiphy *wiphy, struct net_device *dev, const bool enabled) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->u.ap.multicast_to_unicast = enabled; return 0; } void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats, struct txq_info *txqi) { if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_BYTES); txqstats->backlog_bytes = txqi->tin.backlog_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS); txqstats->backlog_packets = txqi->tin.backlog_packets; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_FLOWS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_FLOWS); txqstats->flows = txqi->tin.flows; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_DROPS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_DROPS); txqstats->drops = txqi->cstats.drop_count; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_ECN_MARKS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_ECN_MARKS); txqstats->ecn_marks = txqi->cstats.ecn_mark; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_OVERLIMIT))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_OVERLIMIT); txqstats->overlimit = txqi->tin.overlimit; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_COLLISIONS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_COLLISIONS); txqstats->collisions = txqi->tin.collisions; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_BYTES))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_BYTES); txqstats->tx_bytes = txqi->tin.tx_bytes; } if (!(txqstats->filled & BIT(NL80211_TXQ_STATS_TX_PACKETS))) { txqstats->filled |= BIT(NL80211_TXQ_STATS_TX_PACKETS); txqstats->tx_packets = txqi->tin.tx_packets; } } static int ieee80211_get_txq_stats(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_txq_stats *txqstats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; int ret = 0; spin_lock_bh(&local->fq.lock); rcu_read_lock(); if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (!sdata->vif.txq) { ret = 1; goto out; } ieee80211_fill_txq_stats(txqstats, to_txq_info(sdata->vif.txq)); } else { /* phy stats */ txqstats->filled |= BIT(NL80211_TXQ_STATS_BACKLOG_PACKETS) | BIT(NL80211_TXQ_STATS_BACKLOG_BYTES) | BIT(NL80211_TXQ_STATS_OVERLIMIT) | BIT(NL80211_TXQ_STATS_OVERMEMORY) | BIT(NL80211_TXQ_STATS_COLLISIONS) | BIT(NL80211_TXQ_STATS_MAX_FLOWS); txqstats->backlog_packets = local->fq.backlog; txqstats->backlog_bytes = local->fq.memory_usage; txqstats->overlimit = local->fq.overlimit; txqstats->overmemory = local->fq.overmemory; txqstats->collisions = local->fq.collisions; txqstats->max_flows = local->fq.flows_cnt; } out: rcu_read_unlock(); spin_unlock_bh(&local->fq.lock); return ret; } static int ieee80211_get_ftm_responder_stats(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ftm_responder_stats *ftm_stats) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); return drv_get_ftm_responder_stats(local, sdata, ftm_stats); } static int ieee80211_start_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_start_pmsr(local, sdata, request); } static void ieee80211_abort_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, struct cfg80211_pmsr_request *request) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev); return drv_abort_pmsr(local, sdata, request); } static int ieee80211_set_tid_config(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_tid_config *tid_conf) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->set_tid_config) return -EOPNOTSUPP; if (!tid_conf->peer) return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf); sta = sta_info_get_bss(sdata, tid_conf->peer); if (!sta) return -ENOENT; return drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); } static int ieee80211_reset_tid_config(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 tids) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->reset_tid_config) return -EOPNOTSUPP; if (!peer) return drv_reset_tid_config(sdata->local, sdata, NULL, tids); sta = sta_info_get_bss(sdata, peer); if (!sta) return -ENOENT; return drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); } static int ieee80211_set_sar_specs(struct wiphy *wiphy, struct cfg80211_sar_specs *sar) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_sar_specs) return -EOPNOTSUPP; return local->ops->set_sar_specs(&local->hw, sar); } static int ieee80211_set_after_color_change_beacon(struct ieee80211_link_data *link, u64 *changed) { struct ieee80211_sub_if_data *sdata = link->sdata; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: { int ret; if (!link->u.ap.next_beacon) return -EINVAL; ret = ieee80211_assign_beacon(sdata, link, link->u.ap.next_beacon, NULL, NULL, changed); ieee80211_free_next_beacon(link); if (ret < 0) return ret; break; } default: WARN_ON_ONCE(1); return -EINVAL; } return 0; } static int ieee80211_set_color_change_beacon(struct ieee80211_link_data *link, struct cfg80211_color_change_settings *params, u64 *changed) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_color_change_settings color_change = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: link->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_next); if (!link->u.ap.next_beacon) return -ENOMEM; if (params->count <= 1) break; color_change.counter_offset_beacon = params->counter_offset_beacon; color_change.counter_offset_presp = params->counter_offset_presp; color_change.count = params->count; err = ieee80211_assign_beacon(sdata, link, &params->beacon_color_change, NULL, &color_change, changed); if (err < 0) { ieee80211_free_next_beacon(link); return err; } break; default: return -EOPNOTSUPP; } return 0; } static void ieee80211_color_change_bss_config_notify(struct ieee80211_link_data *link, u8 color, int enable, u64 changed) { struct ieee80211_sub_if_data *sdata = link->sdata; lockdep_assert_wiphy(sdata->local->hw.wiphy); link->conf->he_bss_color.color = color; link->conf->he_bss_color.enabled = enable; changed |= BSS_CHANGED_HE_BSS_COLOR; ieee80211_link_info_change_notify(sdata, link, changed); if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { struct ieee80211_sub_if_data *child; list_for_each_entry(child, &sdata->local->interfaces, list) { if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { child->vif.bss_conf.he_bss_color.color = color; child->vif.bss_conf.he_bss_color.enabled = enable; ieee80211_link_info_change_notify(child, &child->deflink, BSS_CHANGED_HE_BSS_COLOR); } } } } static int ieee80211_color_change_finalize(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); link->conf->color_change_active = false; err = ieee80211_set_after_color_change_beacon(link, &changed); if (err) { cfg80211_color_change_aborted_notify(sdata->dev, link->link_id); return err; } ieee80211_color_change_bss_config_notify(link, link->conf->color_change_color, 1, changed); cfg80211_color_change_notify(sdata->dev, link->link_id); return 0; } void ieee80211_color_change_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, color_change_finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_bss_conf *link_conf = link->conf; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!link_conf->color_change_active) return; if (!ieee80211_sdata_running(sdata)) return; ieee80211_color_change_finalize(link); } void ieee80211_color_collision_detection_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, color_collision_detect_work.work); struct ieee80211_sub_if_data *sdata = link->sdata; cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap, link->link_id); } void ieee80211_color_change_finish(struct ieee80211_vif *vif, u8 link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) { rcu_read_unlock(); return; } wiphy_work_queue(sdata->local->hw.wiphy, &link->color_change_finalize_work); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, u64 color_bitmap, u8 link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link; if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return; rcu_read_lock(); link = rcu_dereference(sdata->link[link_id]); if (WARN_ON(!link)) { rcu_read_unlock(); return; } if (link->conf->color_change_active || link->conf->csa_active) { rcu_read_unlock(); return; } if (wiphy_delayed_work_pending(sdata->local->hw.wiphy, &link->color_collision_detect_work)) { rcu_read_unlock(); return; } link->color_bitmap = color_bitmap; /* queue the color collision detection event every 500 ms in order to * avoid sending too much netlink messages to userspace. */ wiphy_delayed_work_queue(sdata->local->hw.wiphy, &link->color_collision_detect_work, msecs_to_jiffies(500)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify); static int ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_color_change_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_bss_conf *link_conf; struct ieee80211_link_data *link; u8 link_id = params->link_id; u64 changed = 0; int err; lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) return -EINVAL; link = wiphy_dereference(wiphy, sdata->link[link_id]); if (!link) return -ENOLINK; link_conf = link->conf; if (link_conf->nontransmitted) return -EINVAL; /* don't allow another color change if one is already active or if csa * is active */ if (link_conf->color_change_active || link_conf->csa_active) { err = -EBUSY; goto out; } err = ieee80211_set_color_change_beacon(link, params, &changed); if (err) goto out; link_conf->color_change_active = true; link_conf->color_change_color = params->color; cfg80211_color_change_started_notify(sdata->dev, params->count, link_id); if (changed) ieee80211_color_change_bss_config_notify(link, 0, 0, changed); else /* if the beacon didn't change, we can finalize immediately */ ieee80211_color_change_finalize(link); out: return err; } static int ieee80211_set_radar_background(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); if (!local->ops->set_radar_background) return -EOPNOTSUPP; return local->ops->set_radar_background(&local->hw, chandef); } static int ieee80211_add_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); lockdep_assert_wiphy(sdata->local->hw.wiphy); if (wdev->use_4addr) return -EOPNOTSUPP; return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static void ieee80211_del_intf_link(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); u16 new_links = wdev->valid_links & ~BIT(link_id); lockdep_assert_wiphy(sdata->local->hw.wiphy); /* During the link teardown process, certain functions require the * link_id to remain in the valid_links bitmap. Therefore, instead * of removing the link_id from the bitmap, pass a masked value to * simulate as if link_id does not exist anymore. */ ieee80211_vif_set_links(sdata, new_links, 0); } static int ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; int ret; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!sta->sta.valid_links) return -EINVAL; if (sta->sta.valid_links & BIT(params->link_id)) return -EALREADY; ret = ieee80211_sta_allocate_link(sta, params->link_id); if (ret) return ret; ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_NEW, params); if (ret) { ieee80211_sta_free_link(sta, params->link_id); return ret; } if (test_sta_flag(sta, WLAN_STA_ASSOC)) { struct link_sta_info *link_sta; link_sta = sdata_dereference(sta->link[params->link_id], sdata); rate_control_rate_init(link_sta); } /* ieee80211_sta_activate_link frees the link upon failure */ return ieee80211_sta_activate_link(sta, params->link_id); } static int ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; return sta_link_apply_parameters(local, sta, STA_LINK_MODE_LINK_MODIFY, params); } static int ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_del_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; /* must not create a STA without links */ if (sta->sta.valid_links == BIT(params->link_id)) return -EINVAL; ieee80211_sta_remove_link(sta, params->link_id); return 0; } static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_set_hw_timestamp *hwts) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; if (!local->ops->set_hw_timestamp) return -EOPNOTSUPP; if (!check_sdata_in_driver(sdata)) return -EIO; return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); } static int ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); return ieee80211_req_neg_ttlm(sdata, params); } static int ieee80211_assoc_ml_reconf(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_assoc_link *add_links, u16 rem_links) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); lockdep_assert_wiphy(sdata->local->hw.wiphy); return ieee80211_mgd_assoc_ml_reconf(sdata, add_links, rem_links); } const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, .change_virtual_intf = ieee80211_change_iface, .start_p2p_device = ieee80211_start_p2p_device, .stop_p2p_device = ieee80211_stop_p2p_device, .add_key = ieee80211_add_key, .del_key = ieee80211_del_key, .get_key = ieee80211_get_key, .set_default_key = ieee80211_config_default_key, .set_default_mgmt_key = ieee80211_config_default_mgmt_key, .set_default_beacon_key = ieee80211_config_default_beacon_key, .start_ap = ieee80211_start_ap, .change_beacon = ieee80211_change_beacon, .stop_ap = ieee80211_stop_ap, .add_station = ieee80211_add_station, .del_station = ieee80211_del_station, .change_station = ieee80211_change_station, .get_station = ieee80211_get_station, .dump_station = ieee80211_dump_station, .dump_survey = ieee80211_dump_survey, #ifdef CONFIG_MAC80211_MESH .add_mpath = ieee80211_add_mpath, .del_mpath = ieee80211_del_mpath, .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, .get_mpp = ieee80211_get_mpp, .dump_mpp = ieee80211_dump_mpp, .update_mesh_config = ieee80211_update_mesh_config, .get_mesh_config = ieee80211_get_mesh_config, .join_mesh = ieee80211_join_mesh, .leave_mesh = ieee80211_leave_mesh, #endif .join_ocb = ieee80211_join_ocb, .leave_ocb = ieee80211_leave_ocb, .change_bss = ieee80211_change_bss, .inform_bss = ieee80211_inform_bss, .set_txq_params = ieee80211_set_txq_params, .set_monitor_channel = ieee80211_set_monitor_channel, .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, .abort_scan = ieee80211_abort_scan, .sched_scan_start = ieee80211_sched_scan_start, .sched_scan_stop = ieee80211_sched_scan_stop, .auth = ieee80211_auth, .assoc = ieee80211_assoc, .deauth = ieee80211_deauth, .disassoc = ieee80211_disassoc, .join_ibss = ieee80211_join_ibss, .leave_ibss = ieee80211_leave_ibss, .set_mcast_rate = ieee80211_set_mcast_rate, .set_wiphy_params = ieee80211_set_wiphy_params, .set_tx_power = ieee80211_set_tx_power, .get_tx_power = ieee80211_get_tx_power, .rfkill_poll = ieee80211_rfkill_poll, CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) CFG80211_TESTMODE_DUMP(ieee80211_testmode_dump) .set_power_mgmt = ieee80211_set_power_mgmt, .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, .mgmt_tx = ieee80211_mgmt_tx, .mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config, .update_mgmt_frame_registrations = ieee80211_update_mgmt_frame_registrations, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, .set_rekey_data = ieee80211_set_rekey_data, .tdls_oper = ieee80211_tdls_oper, .tdls_mgmt = ieee80211_tdls_mgmt, .tdls_channel_switch = ieee80211_tdls_channel_switch, .tdls_cancel_channel_switch = ieee80211_tdls_cancel_channel_switch, .probe_client = ieee80211_probe_client, .set_noack_map = ieee80211_set_noack_map, #ifdef CONFIG_PM .set_wakeup = ieee80211_set_wakeup, #endif .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, .end_cac = ieee80211_end_cac, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, .nan_change_conf = ieee80211_nan_change_conf, .add_nan_func = ieee80211_add_nan_func, .del_nan_func = ieee80211_del_nan_func, .set_multicast_to_unicast = ieee80211_set_multicast_to_unicast, .tx_control_port = ieee80211_tx_control_port, .get_txq_stats = ieee80211_get_txq_stats, .get_ftm_responder_stats = ieee80211_get_ftm_responder_stats, .start_pmsr = ieee80211_start_pmsr, .abort_pmsr = ieee80211_abort_pmsr, .probe_mesh_link = ieee80211_probe_mesh_link, .set_tid_config = ieee80211_set_tid_config, .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, .color_change = ieee80211_color_change, .set_radar_background = ieee80211_set_radar_background, .add_intf_link = ieee80211_add_intf_link, .del_intf_link = ieee80211_del_intf_link, .add_link_station = ieee80211_add_link_station, .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, .set_hw_timestamp = ieee80211_set_hw_timestamp, .set_ttlm = ieee80211_set_ttlm, .get_radio_mask = ieee80211_get_radio_mask, .assoc_ml_reconf = ieee80211_assoc_ml_reconf, };
7 7 7 7 7 24 19 15 4 16 3 14 5 13 6 18 12 7 12 7 19 19 2 2 2 2 2 2 4 4 2 1 16 4 2 2 4 2 2 4 4 2 2 4 4 6 6 10 2 2 6 6 6 4 4 39 13 2 23 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2014 Fraunhofer ITWM * * Written by: * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> */ #include <linux/err.h> #include <linux/bug.h> #include <linux/completion.h> #include <linux/ieee802154.h> #include <linux/rculist.h> #include <crypto/aead.h> #include <crypto/skcipher.h> #include "ieee802154_i.h" #include "llsec.h" static void llsec_key_put(struct mac802154_llsec_key *key); static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a, const struct ieee802154_llsec_key_id *b); static void llsec_dev_free(struct mac802154_llsec_device *dev); void mac802154_llsec_init(struct mac802154_llsec *sec) { memset(sec, 0, sizeof(*sec)); memset(&sec->params.default_key_source, 0xFF, IEEE802154_ADDR_LEN); INIT_LIST_HEAD(&sec->table.security_levels); INIT_LIST_HEAD(&sec->table.devices); INIT_LIST_HEAD(&sec->table.keys); hash_init(sec->devices_short); hash_init(sec->devices_hw); rwlock_init(&sec->lock); } void mac802154_llsec_destroy(struct mac802154_llsec *sec) { struct ieee802154_llsec_seclevel *sl, *sn; struct ieee802154_llsec_device *dev, *dn; struct ieee802154_llsec_key_entry *key, *kn; list_for_each_entry_safe(sl, sn, &sec->table.security_levels, list) { struct mac802154_llsec_seclevel *msl; msl = container_of(sl, struct mac802154_llsec_seclevel, level); list_del(&sl->list); kfree_sensitive(msl); } list_for_each_entry_safe(dev, dn, &sec->table.devices, list) { struct mac802154_llsec_device *mdev; mdev = container_of(dev, struct mac802154_llsec_device, dev); list_del(&dev->list); llsec_dev_free(mdev); } list_for_each_entry_safe(key, kn, &sec->table.keys, list) { struct mac802154_llsec_key *mkey; mkey = container_of(key->key, struct mac802154_llsec_key, key); list_del(&key->list); llsec_key_put(mkey); kfree_sensitive(key); } } int mac802154_llsec_get_params(struct mac802154_llsec *sec, struct ieee802154_llsec_params *params) { read_lock_bh(&sec->lock); *params = sec->params; read_unlock_bh(&sec->lock); return 0; } int mac802154_llsec_set_params(struct mac802154_llsec *sec, const struct ieee802154_llsec_params *params, int changed) { write_lock_bh(&sec->lock); if (changed & IEEE802154_LLSEC_PARAM_ENABLED) sec->params.enabled = params->enabled; if (changed & IEEE802154_LLSEC_PARAM_FRAME_COUNTER) sec->params.frame_counter = params->frame_counter; if (changed & IEEE802154_LLSEC_PARAM_OUT_LEVEL) sec->params.out_level = params->out_level; if (changed & IEEE802154_LLSEC_PARAM_OUT_KEY) sec->params.out_key = params->out_key; if (changed & IEEE802154_LLSEC_PARAM_KEY_SOURCE) sec->params.default_key_source = params->default_key_source; if (changed & IEEE802154_LLSEC_PARAM_PAN_ID) sec->params.pan_id = params->pan_id; if (changed & IEEE802154_LLSEC_PARAM_HWADDR) sec->params.hwaddr = params->hwaddr; if (changed & IEEE802154_LLSEC_PARAM_COORD_HWADDR) sec->params.coord_hwaddr = params->coord_hwaddr; if (changed & IEEE802154_LLSEC_PARAM_COORD_SHORTADDR) sec->params.coord_shortaddr = params->coord_shortaddr; write_unlock_bh(&sec->lock); return 0; } static struct mac802154_llsec_key* llsec_key_alloc(const struct ieee802154_llsec_key *template) { const int authsizes[3] = { 4, 8, 16 }; struct mac802154_llsec_key *key; int i; key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) return NULL; kref_init(&key->ref); key->key = *template; BUILD_BUG_ON(ARRAY_SIZE(authsizes) != ARRAY_SIZE(key->tfm)); for (i = 0; i < ARRAY_SIZE(key->tfm); i++) { key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(key->tfm[i])) goto err_tfm; if (crypto_aead_setkey(key->tfm[i], template->key, IEEE802154_LLSEC_KEY_SIZE)) goto err_tfm; if (crypto_aead_setauthsize(key->tfm[i], authsizes[i])) goto err_tfm; } key->tfm0 = crypto_alloc_sync_skcipher("ctr(aes)", 0, 0); if (IS_ERR(key->tfm0)) goto err_tfm; if (crypto_sync_skcipher_setkey(key->tfm0, template->key, IEEE802154_LLSEC_KEY_SIZE)) goto err_tfm0; return key; err_tfm0: crypto_free_sync_skcipher(key->tfm0); err_tfm: for (i = 0; i < ARRAY_SIZE(key->tfm); i++) if (!IS_ERR_OR_NULL(key->tfm[i])) crypto_free_aead(key->tfm[i]); kfree_sensitive(key); return NULL; } static void llsec_key_release(struct kref *ref) { struct mac802154_llsec_key *key; int i; key = container_of(ref, struct mac802154_llsec_key, ref); for (i = 0; i < ARRAY_SIZE(key->tfm); i++) crypto_free_aead(key->tfm[i]); crypto_free_sync_skcipher(key->tfm0); kfree_sensitive(key); } static struct mac802154_llsec_key* llsec_key_get(struct mac802154_llsec_key *key) { kref_get(&key->ref); return key; } static void llsec_key_put(struct mac802154_llsec_key *key) { kref_put(&key->ref, llsec_key_release); } static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a, const struct ieee802154_llsec_key_id *b) { if (a->mode != b->mode) return false; if (a->mode == IEEE802154_SCF_KEY_IMPLICIT) return ieee802154_addr_equal(&a->device_addr, &b->device_addr); if (a->id != b->id) return false; switch (a->mode) { case IEEE802154_SCF_KEY_INDEX: return true; case IEEE802154_SCF_KEY_SHORT_INDEX: return a->short_source == b->short_source; case IEEE802154_SCF_KEY_HW_INDEX: return a->extended_source == b->extended_source; } return false; } int mac802154_llsec_key_add(struct mac802154_llsec *sec, const struct ieee802154_llsec_key_id *id, const struct ieee802154_llsec_key *key) { struct mac802154_llsec_key *mkey = NULL; struct ieee802154_llsec_key_entry *pos, *new; if (!(key->frame_types & (1 << IEEE802154_FC_TYPE_MAC_CMD)) && key->cmd_frame_ids) return -EINVAL; list_for_each_entry(pos, &sec->table.keys, list) { if (llsec_key_id_equal(&pos->id, id)) return -EEXIST; if (memcmp(pos->key->key, key->key, IEEE802154_LLSEC_KEY_SIZE)) continue; mkey = container_of(pos->key, struct mac802154_llsec_key, key); /* Don't allow multiple instances of the same AES key to have * different allowed frame types/command frame ids, as this is * not possible in the 802.15.4 PIB. */ if (pos->key->frame_types != key->frame_types || pos->key->cmd_frame_ids != key->cmd_frame_ids) return -EEXIST; break; } new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return -ENOMEM; if (!mkey) mkey = llsec_key_alloc(key); else mkey = llsec_key_get(mkey); if (!mkey) goto fail; new->id = *id; new->key = &mkey->key; list_add_rcu(&new->list, &sec->table.keys); return 0; fail: kfree_sensitive(new); return -ENOMEM; } static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu) { struct ieee802154_llsec_key_entry *pos; struct mac802154_llsec_key *mkey; pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu); mkey = container_of(pos->key, struct mac802154_llsec_key, key); llsec_key_put(mkey); kfree_sensitive(pos); } int mac802154_llsec_key_del(struct mac802154_llsec *sec, const struct ieee802154_llsec_key_id *key) { struct ieee802154_llsec_key_entry *pos; list_for_each_entry(pos, &sec->table.keys, list) { if (llsec_key_id_equal(&pos->id, key)) { list_del_rcu(&pos->list); call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu); return 0; } } return -ENOENT; } static bool llsec_dev_use_shortaddr(__le16 short_addr) { return short_addr != cpu_to_le16(IEEE802154_ADDR_UNDEF) && short_addr != cpu_to_le16(0xffff); } static u32 llsec_dev_hash_short(__le16 short_addr, __le16 pan_id) { return ((__force u16)short_addr) << 16 | (__force u16)pan_id; } static u64 llsec_dev_hash_long(__le64 hwaddr) { return (__force u64)hwaddr; } static struct mac802154_llsec_device* llsec_dev_find_short(struct mac802154_llsec *sec, __le16 short_addr, __le16 pan_id) { struct mac802154_llsec_device *dev; u32 key = llsec_dev_hash_short(short_addr, pan_id); hash_for_each_possible_rcu(sec->devices_short, dev, bucket_s, key) { if (dev->dev.short_addr == short_addr && dev->dev.pan_id == pan_id) return dev; } return NULL; } static struct mac802154_llsec_device* llsec_dev_find_long(struct mac802154_llsec *sec, __le64 hwaddr) { struct mac802154_llsec_device *dev; u64 key = llsec_dev_hash_long(hwaddr); hash_for_each_possible_rcu(sec->devices_hw, dev, bucket_hw, key) { if (dev->dev.hwaddr == hwaddr) return dev; } return NULL; } static void llsec_dev_free(struct mac802154_llsec_device *dev) { struct ieee802154_llsec_device_key *pos, *pn; struct mac802154_llsec_device_key *devkey; list_for_each_entry_safe(pos, pn, &dev->dev.keys, list) { devkey = container_of(pos, struct mac802154_llsec_device_key, devkey); list_del(&pos->list); kfree_sensitive(devkey); } kfree_sensitive(dev); } int mac802154_llsec_dev_add(struct mac802154_llsec *sec, const struct ieee802154_llsec_device *dev) { struct mac802154_llsec_device *entry; u32 skey = llsec_dev_hash_short(dev->short_addr, dev->pan_id); u64 hwkey = llsec_dev_hash_long(dev->hwaddr); BUILD_BUG_ON(sizeof(hwkey) != IEEE802154_ADDR_LEN); if ((llsec_dev_use_shortaddr(dev->short_addr) && llsec_dev_find_short(sec, dev->short_addr, dev->pan_id)) || llsec_dev_find_long(sec, dev->hwaddr)) return -EEXIST; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->dev = *dev; spin_lock_init(&entry->lock); INIT_LIST_HEAD(&entry->dev.keys); if (llsec_dev_use_shortaddr(dev->short_addr)) hash_add_rcu(sec->devices_short, &entry->bucket_s, skey); else INIT_HLIST_NODE(&entry->bucket_s); hash_add_rcu(sec->devices_hw, &entry->bucket_hw, hwkey); list_add_tail_rcu(&entry->dev.list, &sec->table.devices); return 0; } static void llsec_dev_free_rcu(struct rcu_head *rcu) { llsec_dev_free(container_of(rcu, struct mac802154_llsec_device, rcu)); } int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr) { struct mac802154_llsec_device *pos; pos = llsec_dev_find_long(sec, device_addr); if (!pos) return -ENOENT; hash_del_rcu(&pos->bucket_s); hash_del_rcu(&pos->bucket_hw); list_del_rcu(&pos->dev.list); call_rcu(&pos->rcu, llsec_dev_free_rcu); return 0; } static struct mac802154_llsec_device_key* llsec_devkey_find(struct mac802154_llsec_device *dev, const struct ieee802154_llsec_key_id *key) { struct ieee802154_llsec_device_key *devkey; list_for_each_entry_rcu(devkey, &dev->dev.keys, list) { if (!llsec_key_id_equal(key, &devkey->key_id)) continue; return container_of(devkey, struct mac802154_llsec_device_key, devkey); } return NULL; } int mac802154_llsec_devkey_add(struct mac802154_llsec *sec, __le64 dev_addr, const struct ieee802154_llsec_device_key *key) { struct mac802154_llsec_device *dev; struct mac802154_llsec_device_key *devkey; dev = llsec_dev_find_long(sec, dev_addr); if (!dev) return -ENOENT; if (llsec_devkey_find(dev, &key->key_id)) return -EEXIST; devkey = kmalloc(sizeof(*devkey), GFP_KERNEL); if (!devkey) return -ENOMEM; devkey->devkey = *key; list_add_tail_rcu(&devkey->devkey.list, &dev->dev.keys); return 0; } int mac802154_llsec_devkey_del(struct mac802154_llsec *sec, __le64 dev_addr, const struct ieee802154_llsec_device_key *key) { struct mac802154_llsec_device *dev; struct mac802154_llsec_device_key *devkey; dev = llsec_dev_find_long(sec, dev_addr); if (!dev) return -ENOENT; devkey = llsec_devkey_find(dev, &key->key_id); if (!devkey) return -ENOENT; list_del_rcu(&devkey->devkey.list); kfree_rcu(devkey, rcu); return 0; } static struct mac802154_llsec_seclevel* llsec_find_seclevel(const struct mac802154_llsec *sec, const struct ieee802154_llsec_seclevel *sl) { struct ieee802154_llsec_seclevel *pos; list_for_each_entry(pos, &sec->table.security_levels, list) { if (pos->frame_type != sl->frame_type || (pos->frame_type == IEEE802154_FC_TYPE_MAC_CMD && pos->cmd_frame_id != sl->cmd_frame_id) || pos->device_override != sl->device_override || pos->sec_levels != sl->sec_levels) continue; return container_of(pos, struct mac802154_llsec_seclevel, level); } return NULL; } int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec, const struct ieee802154_llsec_seclevel *sl) { struct mac802154_llsec_seclevel *entry; if (llsec_find_seclevel(sec, sl)) return -EEXIST; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->level = *sl; list_add_tail_rcu(&entry->level.list, &sec->table.security_levels); return 0; } int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec, const struct ieee802154_llsec_seclevel *sl) { struct mac802154_llsec_seclevel *pos; pos = llsec_find_seclevel(sec, sl); if (!pos) return -ENOENT; list_del_rcu(&pos->level.list); kfree_rcu(pos, rcu); return 0; } static int llsec_recover_addr(struct mac802154_llsec *sec, struct ieee802154_addr *addr) { __le16 caddr = sec->params.coord_shortaddr; addr->pan_id = sec->params.pan_id; if (caddr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { return -EINVAL; } else if (caddr == cpu_to_le16(IEEE802154_ADDR_UNDEF)) { addr->extended_addr = sec->params.coord_hwaddr; addr->mode = IEEE802154_ADDR_LONG; } else { addr->short_addr = sec->params.coord_shortaddr; addr->mode = IEEE802154_ADDR_SHORT; } return 0; } static struct mac802154_llsec_key* llsec_lookup_key(struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, const struct ieee802154_addr *addr, struct ieee802154_llsec_key_id *key_id) { struct ieee802154_addr devaddr = *addr; u8 key_id_mode = hdr->sec.key_id_mode; struct ieee802154_llsec_key_entry *key_entry; struct mac802154_llsec_key *key; if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT && devaddr.mode == IEEE802154_ADDR_NONE) { if (hdr->fc.type == IEEE802154_FC_TYPE_BEACON) { devaddr.extended_addr = sec->params.coord_hwaddr; devaddr.mode = IEEE802154_ADDR_LONG; } else if (llsec_recover_addr(sec, &devaddr) < 0) { return NULL; } } list_for_each_entry_rcu(key_entry, &sec->table.keys, list) { const struct ieee802154_llsec_key_id *id = &key_entry->id; if (!(key_entry->key->frame_types & BIT(hdr->fc.type))) continue; if (id->mode != key_id_mode) continue; if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT) { if (ieee802154_addr_equal(&devaddr, &id->device_addr)) goto found; } else { if (id->id != hdr->sec.key_id) continue; if ((key_id_mode == IEEE802154_SCF_KEY_INDEX) || (key_id_mode == IEEE802154_SCF_KEY_SHORT_INDEX && id->short_source == hdr->sec.short_src) || (key_id_mode == IEEE802154_SCF_KEY_HW_INDEX && id->extended_source == hdr->sec.extended_src)) goto found; } } return NULL; found: key = container_of(key_entry->key, struct mac802154_llsec_key, key); if (key_id) *key_id = key_entry->id; return llsec_key_get(key); } static void llsec_geniv(u8 iv[16], __le64 addr, const struct ieee802154_sechdr *sec) { __be64 addr_bytes = (__force __be64) swab64((__force u64) addr); __be32 frame_counter = (__force __be32) swab32((__force u32) sec->frame_counter); iv[0] = 1; /* L' = L - 1 = 1 */ memcpy(iv + 1, &addr_bytes, sizeof(addr_bytes)); memcpy(iv + 9, &frame_counter, sizeof(frame_counter)); iv[13] = sec->level; iv[14] = 0; iv[15] = 1; } static int llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, struct mac802154_llsec_key *key) { u8 iv[16]; struct scatterlist src; SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0); int err, datalen; unsigned char *data; llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); /* Compute data payload offset and data length */ data = skb_mac_header(skb) + skb->mac_len; datalen = skb_tail_pointer(skb) - data; sg_init_one(&src, data, datalen); skcipher_request_set_sync_tfm(req, key->tfm0); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &src, &src, datalen, iv); err = crypto_skcipher_encrypt(req); skcipher_request_zero(req); return err; } static struct crypto_aead* llsec_tfm_by_len(struct mac802154_llsec_key *key, int authlen) { int i; for (i = 0; i < ARRAY_SIZE(key->tfm); i++) if (crypto_aead_authsize(key->tfm[i]) == authlen) return key->tfm[i]; BUG(); } static int llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, struct mac802154_llsec_key *key) { u8 iv[16]; unsigned char *data; int authlen, assoclen, datalen, rc; struct scatterlist sg; struct aead_request *req; authlen = ieee802154_sechdr_authtag_len(&hdr->sec); llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC); if (!req) return -ENOMEM; assoclen = skb->mac_len; data = skb_mac_header(skb) + skb->mac_len; datalen = skb_tail_pointer(skb) - data; skb_put(skb, authlen); sg_init_one(&sg, skb_mac_header(skb), assoclen + datalen + authlen); if (!(hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC)) { assoclen += datalen; datalen = 0; } aead_request_set_callback(req, 0, NULL, NULL); aead_request_set_crypt(req, &sg, &sg, datalen, iv); aead_request_set_ad(req, assoclen); rc = crypto_aead_encrypt(req); kfree_sensitive(req); return rc; } static int llsec_do_encrypt(struct sk_buff *skb, const struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, struct mac802154_llsec_key *key) { if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC) return llsec_do_encrypt_unauth(skb, sec, hdr, key); else return llsec_do_encrypt_auth(skb, sec, hdr, key); } int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) { struct ieee802154_hdr hdr; int rc, authlen, hlen; struct mac802154_llsec_key *key; u32 frame_ctr; hlen = ieee802154_hdr_pull(skb, &hdr); /* TODO: control frames security support */ if (hlen < 0 || (hdr.fc.type != IEEE802154_FC_TYPE_DATA && hdr.fc.type != IEEE802154_FC_TYPE_BEACON)) return -EINVAL; if (!hdr.fc.security_enabled || (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) { skb_push(skb, hlen); return 0; } authlen = ieee802154_sechdr_authtag_len(&hdr.sec); if (skb->len + hlen + authlen + IEEE802154_MFR_SIZE > IEEE802154_MTU) return -EMSGSIZE; rcu_read_lock(); read_lock_bh(&sec->lock); if (!sec->params.enabled) { rc = -EINVAL; goto fail_read; } key = llsec_lookup_key(sec, &hdr, &hdr.dest, NULL); if (!key) { rc = -ENOKEY; goto fail_read; } read_unlock_bh(&sec->lock); write_lock_bh(&sec->lock); frame_ctr = be32_to_cpu(sec->params.frame_counter); hdr.sec.frame_counter = cpu_to_le32(frame_ctr); if (frame_ctr == 0xFFFFFFFF) { write_unlock_bh(&sec->lock); llsec_key_put(key); rc = -EOVERFLOW; goto fail; } sec->params.frame_counter = cpu_to_be32(frame_ctr + 1); write_unlock_bh(&sec->lock); rcu_read_unlock(); skb->mac_len = ieee802154_hdr_push(skb, &hdr); skb_reset_mac_header(skb); rc = llsec_do_encrypt(skb, sec, &hdr, key); llsec_key_put(key); return rc; fail_read: read_unlock_bh(&sec->lock); fail: rcu_read_unlock(); return rc; } static struct mac802154_llsec_device* llsec_lookup_dev(struct mac802154_llsec *sec, const struct ieee802154_addr *addr) { struct ieee802154_addr devaddr = *addr; struct mac802154_llsec_device *dev = NULL; if (devaddr.mode == IEEE802154_ADDR_NONE && llsec_recover_addr(sec, &devaddr) < 0) return NULL; if (devaddr.mode == IEEE802154_ADDR_SHORT) { u32 key = llsec_dev_hash_short(devaddr.short_addr, devaddr.pan_id); hash_for_each_possible_rcu(sec->devices_short, dev, bucket_s, key) { if (dev->dev.pan_id == devaddr.pan_id && dev->dev.short_addr == devaddr.short_addr) return dev; } } else { u64 key = llsec_dev_hash_long(devaddr.extended_addr); hash_for_each_possible_rcu(sec->devices_hw, dev, bucket_hw, key) { if (dev->dev.hwaddr == devaddr.extended_addr) return dev; } } return NULL; } static int llsec_lookup_seclevel(const struct mac802154_llsec *sec, u8 frame_type, u8 cmd_frame_id, struct ieee802154_llsec_seclevel *rlevel) { struct ieee802154_llsec_seclevel *level; list_for_each_entry_rcu(level, &sec->table.security_levels, list) { if (level->frame_type == frame_type && (frame_type != IEEE802154_FC_TYPE_MAC_CMD || level->cmd_frame_id == cmd_frame_id)) { *rlevel = *level; return 0; } } return -EINVAL; } static int llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, struct mac802154_llsec_key *key, __le64 dev_addr) { u8 iv[16]; unsigned char *data; int datalen; struct scatterlist src; SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0); int err; llsec_geniv(iv, dev_addr, &hdr->sec); data = skb_mac_header(skb) + skb->mac_len; datalen = skb_tail_pointer(skb) - data; sg_init_one(&src, data, datalen); skcipher_request_set_sync_tfm(req, key->tfm0); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &src, &src, datalen, iv); err = crypto_skcipher_decrypt(req); skcipher_request_zero(req); return err; } static int llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, struct mac802154_llsec_key *key, __le64 dev_addr) { u8 iv[16]; unsigned char *data; int authlen, datalen, assoclen, rc; struct scatterlist sg; struct aead_request *req; authlen = ieee802154_sechdr_authtag_len(&hdr->sec); llsec_geniv(iv, dev_addr, &hdr->sec); req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC); if (!req) return -ENOMEM; assoclen = skb->mac_len; data = skb_mac_header(skb) + skb->mac_len; datalen = skb_tail_pointer(skb) - data; sg_init_one(&sg, skb_mac_header(skb), assoclen + datalen); if (!(hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC)) { assoclen += datalen - authlen; datalen = authlen; } aead_request_set_callback(req, 0, NULL, NULL); aead_request_set_crypt(req, &sg, &sg, datalen, iv); aead_request_set_ad(req, assoclen); rc = crypto_aead_decrypt(req); kfree_sensitive(req); skb_trim(skb, skb->len - authlen); return rc; } static int llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, struct mac802154_llsec_key *key, __le64 dev_addr) { if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC) return llsec_do_decrypt_unauth(skb, sec, hdr, key, dev_addr); else return llsec_do_decrypt_auth(skb, sec, hdr, key, dev_addr); } static int llsec_update_devkey_record(struct mac802154_llsec_device *dev, const struct ieee802154_llsec_key_id *in_key) { struct mac802154_llsec_device_key *devkey; devkey = llsec_devkey_find(dev, in_key); if (!devkey) { struct mac802154_llsec_device_key *next; next = kzalloc(sizeof(*devkey), GFP_ATOMIC); if (!next) return -ENOMEM; next->devkey.key_id = *in_key; spin_lock_bh(&dev->lock); devkey = llsec_devkey_find(dev, in_key); if (!devkey) list_add_rcu(&next->devkey.list, &dev->dev.keys); else kfree_sensitive(next); spin_unlock_bh(&dev->lock); } return 0; } static int llsec_update_devkey_info(struct mac802154_llsec_device *dev, const struct ieee802154_llsec_key_id *in_key, u32 frame_counter) { struct mac802154_llsec_device_key *devkey = NULL; if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RESTRICT) { devkey = llsec_devkey_find(dev, in_key); if (!devkey) return -ENOENT; } if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RECORD) { int rc = llsec_update_devkey_record(dev, in_key); if (rc < 0) return rc; } spin_lock_bh(&dev->lock); if ((!devkey && frame_counter < dev->dev.frame_counter) || (devkey && frame_counter < devkey->devkey.frame_counter)) { spin_unlock_bh(&dev->lock); return -EINVAL; } if (devkey) devkey->devkey.frame_counter = frame_counter + 1; else dev->dev.frame_counter = frame_counter + 1; spin_unlock_bh(&dev->lock); return 0; } int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb) { struct ieee802154_hdr hdr; struct mac802154_llsec_key *key; struct ieee802154_llsec_key_id key_id; struct mac802154_llsec_device *dev; struct ieee802154_llsec_seclevel seclevel; int err; __le64 dev_addr; u32 frame_ctr; if (ieee802154_hdr_peek(skb, &hdr) < 0) return -EINVAL; if (!hdr.fc.security_enabled) return 0; if (hdr.fc.version == 0) return -EINVAL; read_lock_bh(&sec->lock); if (!sec->params.enabled) { read_unlock_bh(&sec->lock); return -EINVAL; } read_unlock_bh(&sec->lock); rcu_read_lock(); key = llsec_lookup_key(sec, &hdr, &hdr.source, &key_id); if (!key) { err = -ENOKEY; goto fail; } dev = llsec_lookup_dev(sec, &hdr.source); if (!dev) { err = -EINVAL; goto fail_dev; } if (llsec_lookup_seclevel(sec, hdr.fc.type, 0, &seclevel) < 0) { err = -EINVAL; goto fail_dev; } if (!(seclevel.sec_levels & BIT(hdr.sec.level)) && (hdr.sec.level == 0 && seclevel.device_override && !dev->dev.seclevel_exempt)) { err = -EINVAL; goto fail_dev; } frame_ctr = le32_to_cpu(hdr.sec.frame_counter); if (frame_ctr == 0xffffffff) { err = -EOVERFLOW; goto fail_dev; } err = llsec_update_devkey_info(dev, &key_id, frame_ctr); if (err) goto fail_dev; dev_addr = dev->dev.hwaddr; rcu_read_unlock(); err = llsec_do_decrypt(skb, sec, &hdr, key, dev_addr); llsec_key_put(key); return err; fail_dev: llsec_key_put(key); fail: rcu_read_unlock(); return err; }
20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 #include <linux/export.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/vmalloc.h> /* Allocate an array of spinlocks to be accessed by a hash. Two arguments * indicate the number of elements to allocate in the array. max_size * gives the maximum number of elements to allocate. cpu_mult gives * the number of locks per CPU to allocate. The size is rounded up * to a power of 2 to be suitable as a hash table. */ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, size_t max_size, unsigned int cpu_mult, gfp_t gfp, const char *name, struct lock_class_key *key) { spinlock_t *tlocks = NULL; unsigned int i, size; #if defined(CONFIG_PROVE_LOCKING) unsigned int nr_pcpus = 2; #else unsigned int nr_pcpus = num_possible_cpus(); #endif if (cpu_mult) { nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size); } else { size = max_size; } if (sizeof(spinlock_t) != 0) { tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp); if (!tlocks) return -ENOMEM; for (i = 0; i < size; i++) { spin_lock_init(&tlocks[i]); lockdep_init_map(&tlocks[i].dep_map, name, key, 0); } } *locks = tlocks; *locks_mask = size - 1; return 0; } EXPORT_SYMBOL(__alloc_bucket_spinlocks); void free_bucket_spinlocks(spinlock_t *locks) { kvfree(locks); } EXPORT_SYMBOL(free_bucket_spinlocks);
183 182 178 154 157 154 10 150 162 133 156 163 133 156 156 133 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 // SPDX-License-Identifier: GPL-2.0+ #include <linux/iosys-map.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include "vkms_drv.h" #include "vkms_formats.h" static const u32 vkms_formats[] = { DRM_FORMAT_ARGB8888, DRM_FORMAT_XRGB8888, DRM_FORMAT_XRGB16161616, DRM_FORMAT_ARGB16161616, DRM_FORMAT_RGB565 }; static struct drm_plane_state * vkms_plane_duplicate_state(struct drm_plane *plane) { struct vkms_plane_state *vkms_state; struct vkms_frame_info *frame_info; vkms_state = kzalloc(sizeof(*vkms_state), GFP_KERNEL); if (!vkms_state) return NULL; frame_info = kzalloc(sizeof(*frame_info), GFP_KERNEL); if (!frame_info) { DRM_DEBUG_KMS("Couldn't allocate frame_info\n"); kfree(vkms_state); return NULL; } vkms_state->frame_info = frame_info; __drm_gem_duplicate_shadow_plane_state(plane, &vkms_state->base); return &vkms_state->base.base; } static void vkms_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *old_state) { struct vkms_plane_state *vkms_state = to_vkms_plane_state(old_state); struct drm_crtc *crtc = vkms_state->base.base.crtc; if (crtc && vkms_state->frame_info->fb) { /* dropping the reference we acquired in * vkms_primary_plane_update() */ if (drm_framebuffer_read_refcount(vkms_state->frame_info->fb)) drm_framebuffer_put(vkms_state->frame_info->fb); } kfree(vkms_state->frame_info); vkms_state->frame_info = NULL; __drm_gem_destroy_shadow_plane_state(&vkms_state->base); kfree(vkms_state); } static void vkms_plane_reset(struct drm_plane *plane) { struct vkms_plane_state *vkms_state; if (plane->state) { vkms_plane_destroy_state(plane, plane->state); plane->state = NULL; /* must be set to NULL here */ } vkms_state = kzalloc(sizeof(*vkms_state), GFP_KERNEL); if (!vkms_state) { DRM_ERROR("Cannot allocate vkms_plane_state\n"); return; } __drm_gem_reset_shadow_plane(plane, &vkms_state->base); } static const struct drm_plane_funcs vkms_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .reset = vkms_plane_reset, .atomic_duplicate_state = vkms_plane_duplicate_state, .atomic_destroy_state = vkms_plane_destroy_state, }; static void vkms_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct vkms_plane_state *vkms_plane_state; struct drm_shadow_plane_state *shadow_plane_state; struct drm_framebuffer *fb = new_state->fb; struct vkms_frame_info *frame_info; u32 fmt; if (!new_state->crtc || !fb) return; fmt = fb->format->format; vkms_plane_state = to_vkms_plane_state(new_state); shadow_plane_state = &vkms_plane_state->base; frame_info = vkms_plane_state->frame_info; memcpy(&frame_info->src, &new_state->src, sizeof(struct drm_rect)); memcpy(&frame_info->dst, &new_state->dst, sizeof(struct drm_rect)); frame_info->fb = fb; memcpy(&frame_info->map, &shadow_plane_state->data, sizeof(frame_info->map)); drm_framebuffer_get(frame_info->fb); frame_info->rotation = new_state->rotation; vkms_plane_state->pixel_read_line = get_pixel_read_line_function(fmt); } static int vkms_plane_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); struct drm_crtc_state *crtc_state; int ret; if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc)) return 0; crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc); if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, DRM_PLANE_NO_SCALING, DRM_PLANE_NO_SCALING, true, true); if (ret != 0) return ret; return 0; } static int vkms_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { struct drm_shadow_plane_state *shadow_plane_state; struct drm_framebuffer *fb = state->fb; int ret; if (!fb) return 0; shadow_plane_state = to_drm_shadow_plane_state(state); ret = drm_gem_plane_helper_prepare_fb(plane, state); if (ret) return ret; return drm_gem_fb_vmap(fb, shadow_plane_state->map, shadow_plane_state->data); } static void vkms_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *state) { struct drm_shadow_plane_state *shadow_plane_state; struct drm_framebuffer *fb = state->fb; if (!fb) return; shadow_plane_state = to_drm_shadow_plane_state(state); drm_gem_fb_vunmap(fb, shadow_plane_state->map); } static const struct drm_plane_helper_funcs vkms_plane_helper_funcs = { .atomic_update = vkms_plane_atomic_update, .atomic_check = vkms_plane_atomic_check, .prepare_fb = vkms_prepare_fb, .cleanup_fb = vkms_cleanup_fb, }; struct vkms_plane *vkms_plane_init(struct vkms_device *vkmsdev, enum drm_plane_type type) { struct drm_device *dev = &vkmsdev->drm; struct vkms_plane *plane; plane = drmm_universal_plane_alloc(dev, struct vkms_plane, base, 0, &vkms_plane_funcs, vkms_formats, ARRAY_SIZE(vkms_formats), NULL, type, NULL); if (IS_ERR(plane)) return plane; drm_plane_helper_add(&plane->base, &vkms_plane_helper_funcs); drm_plane_create_rotation_property(&plane->base, DRM_MODE_ROTATE_0, DRM_MODE_ROTATE_MASK | DRM_MODE_REFLECT_MASK); return plane; }
8 8 8 1 1 3 1 2 6 6 6 3 3 3 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Counters support */ /* 2 Comments support */ /* 3 Forceadd support */ /* 4 skbinfo support */ /* 5 bucketsize, initval support */ #define IPSET_TYPE_REV_MAX 6 /* bitmask support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ #define HTYPE hash_ip #define IP_SET_HASH_WITH_NETMASK #define IP_SET_HASH_WITH_BITMASK /* IPv4 variant */ /* Member elements */ struct hash_ip4_elem { /* Zero valued IP addresses cannot be stored */ __be32 ip; }; /* Common functions */ static bool hash_ip4_data_equal(const struct hash_ip4_elem *e1, const struct hash_ip4_elem *e2, u32 *multi) { return e1->ip == e2->ip; } static bool hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e) { next->ip = e->ip; } #define MTYPE hash_ip4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); ip &= h->bitmask.ip; if (ip == 0) return -EINVAL; e.ip = ip; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, hosts, i = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ip &= ntohl(h->bitmask.ip); e.ip = htonl(ip); if (e.ip == 0) return -IPSET_ERR_HASH_ELEM; if (adt == IPSET_TEST) return adtfn(set, &e, &ext, &ext, flags); ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) { if (ip_to == 0) return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; i++) { e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_ip4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ip += hosts; if (ip == 0) return 0; ret = 0; } return ret; } /* IPv6 variant */ /* Member elements */ struct hash_ip6_elem { union nf_inet_addr ip; }; /* Common functions */ static bool hash_ip6_data_equal(const struct hash_ip6_elem *ip1, const struct hash_ip6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6); } static bool hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e) { } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ip6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); nf_inet_addr_mask_inplace(&e.ip, &h->bitmask); if (ipv6_addr_any(&e.ip.in6)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP])) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; nf_inet_addr_mask_inplace(&e.ip, &h->bitmask); if (ipv6_addr_any(&e.ip.in6)) return -IPSET_ERR_HASH_ELEM; ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } static struct ip_set_type hash_ip_type __read_mostly = { .name = "hash:ip", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, [IPSET_ATTR_BITMASK] = { .type = NLA_NESTED }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ip_init(void) { return ip_set_type_register(&hash_ip_type); } static void __exit hash_ip_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ip_type); } module_init(hash_ip_init); module_exit(hash_ip_fini);
165 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_PLANE_H__ #define __DRM_PLANE_H__ #include <linux/list.h> #include <linux/ctype.h> #include <linux/kmsg_dump.h> #include <drm/drm_mode_object.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_rect.h> #include <drm/drm_modeset_lock.h> #include <drm/drm_util.h> struct drm_crtc; struct drm_plane_size_hint; struct drm_printer; struct drm_modeset_acquire_ctx; enum drm_scaling_filter { DRM_SCALING_FILTER_DEFAULT, DRM_SCALING_FILTER_NEAREST_NEIGHBOR, }; /** * struct drm_plane_state - mutable plane state * * Please note that the destination coordinates @crtc_x, @crtc_y, @crtc_h and * @crtc_w and the source coordinates @src_x, @src_y, @src_h and @src_w are the * raw coordinates provided by userspace. Drivers should use * drm_atomic_helper_check_plane_state() and only use the derived rectangles in * @src and @dst to program the hardware. */ struct drm_plane_state { /** @plane: backpointer to the plane */ struct drm_plane *plane; /** * @crtc: * * Currently bound CRTC, NULL if disabled. Do not write this directly, * use drm_atomic_set_crtc_for_plane() */ struct drm_crtc *crtc; /** * @fb: * * Currently bound framebuffer. Do not write this directly, use * drm_atomic_set_fb_for_plane() */ struct drm_framebuffer *fb; /** * @fence: * * Optional fence to wait for before scanning out @fb. The core atomic * code will set this when userspace is using explicit fencing. Do not * write this field directly for a driver's implicit fence. * * Drivers should store any implicit fence in this from their * &drm_plane_helper_funcs.prepare_fb callback. See * drm_gem_plane_helper_prepare_fb() for a suitable helper. */ struct dma_fence *fence; /** * @crtc_x: * * Left position of visible portion of plane on crtc, signed dest * location allows it to be partially off screen. */ int32_t crtc_x; /** * @crtc_y: * * Upper position of visible portion of plane on crtc, signed dest * location allows it to be partially off screen. */ int32_t crtc_y; /** @crtc_w: width of visible portion of plane on crtc */ /** @crtc_h: height of visible portion of plane on crtc */ uint32_t crtc_w, crtc_h; /** * @src_x: left position of visible portion of plane within plane (in * 16.16 fixed point). */ uint32_t src_x; /** * @src_y: upper position of visible portion of plane within plane (in * 16.16 fixed point). */ uint32_t src_y; /** @src_w: width of visible portion of plane (in 16.16) */ /** @src_h: height of visible portion of plane (in 16.16) */ uint32_t src_h, src_w; /** @hotspot_x: x offset to mouse cursor hotspot */ /** @hotspot_y: y offset to mouse cursor hotspot */ int32_t hotspot_x, hotspot_y; /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as * completely opaque. See drm_plane_create_alpha_property() for more * details. */ u16 alpha; /** * @pixel_blend_mode: * The alpha blending equation selection, describing how the pixels from * the current plane are composited with the background. Value can be * one of DRM_MODE_BLEND_* */ uint16_t pixel_blend_mode; /** * @rotation: * Rotation of the plane. See drm_plane_create_rotation_property() for * more details. */ unsigned int rotation; /** * @zpos: * Priority of the given plane on crtc (optional). * * User-space may set mutable zpos properties so that multiple active * planes on the same CRTC have identical zpos values. This is a * user-space bug, but drivers can solve the conflict by comparing the * plane object IDs; the plane with a higher ID is stacked on top of a * plane with a lower ID. * * See drm_plane_create_zpos_property() and * drm_plane_create_zpos_immutable_property() for more details. */ unsigned int zpos; /** * @normalized_zpos: * Normalized value of zpos: unique, range from 0 to N-1 where N is the * number of active planes for given crtc. Note that the driver must set * &drm_mode_config.normalize_zpos or call drm_atomic_normalize_zpos() to * update this before it can be trusted. */ unsigned int normalized_zpos; /** * @color_encoding: * * Color encoding for non RGB formats */ enum drm_color_encoding color_encoding; /** * @color_range: * * Color range for non RGB formats */ enum drm_color_range color_range; /** * @fb_damage_clips: * * Blob representing damage (area in plane framebuffer that changed * since last plane update) as an array of &drm_mode_rect in framebuffer * coodinates of the attached framebuffer. Note that unlike plane src, * damage clips are not in 16.16 fixed point. * * See drm_plane_get_damage_clips() and * drm_plane_get_damage_clips_count() for accessing these. */ struct drm_property_blob *fb_damage_clips; /** * @ignore_damage_clips: * * Set by drivers to indicate the drm_atomic_helper_damage_iter_init() * helper that the @fb_damage_clips blob property should be ignored. * * See :ref:`damage_tracking_properties` for more information. */ bool ignore_damage_clips; /** * @src: * * source coordinates of the plane (in 16.16). * * When using drm_atomic_helper_check_plane_state(), * the coordinates are clipped, but the driver may choose * to use unclipped coordinates instead when the hardware * performs the clipping automatically. */ /** * @dst: * * clipped destination coordinates of the plane. * * When using drm_atomic_helper_check_plane_state(), * the coordinates are clipped, but the driver may choose * to use unclipped coordinates instead when the hardware * performs the clipping automatically. */ struct drm_rect src, dst; /** * @visible: * * Visibility of the plane. This can be false even if fb!=NULL and * crtc!=NULL, due to clipping. */ bool visible; /** * @scaling_filter: * * Scaling filter to be applied */ enum drm_scaling_filter scaling_filter; /** * @commit: Tracks the pending commit to prevent use-after-free conditions, * and for async plane updates. * * May be NULL. */ struct drm_crtc_commit *commit; /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; /** * @color_mgmt_changed: Color management properties have changed. Used * by the atomic helpers and drivers to steer the atomic commit control * flow. */ bool color_mgmt_changed : 1; }; static inline struct drm_rect drm_plane_state_src(const struct drm_plane_state *state) { struct drm_rect src = { .x1 = state->src_x, .y1 = state->src_y, .x2 = state->src_x + state->src_w, .y2 = state->src_y + state->src_h, }; return src; } static inline struct drm_rect drm_plane_state_dest(const struct drm_plane_state *state) { struct drm_rect dest = { .x1 = state->crtc_x, .y1 = state->crtc_y, .x2 = state->crtc_x + state->crtc_w, .y2 = state->crtc_y + state->crtc_h, }; return dest; } /** * struct drm_plane_funcs - driver plane control functions */ struct drm_plane_funcs { /** * @update_plane: * * This is the legacy entry point to enable and configure the plane for * the given CRTC and framebuffer. It is never called to disable the * plane, i.e. the passed-in crtc and fb paramters are never NULL. * * The source rectangle in frame buffer memory coordinates is given by * the src_x, src_y, src_w and src_h parameters (as 16.16 fixed point * values). Devices that don't support subpixel plane coordinates can * ignore the fractional part. * * The destination rectangle in CRTC coordinates is given by the * crtc_x, crtc_y, crtc_w and crtc_h parameters (as integer values). * Devices scale the source rectangle to the destination rectangle. If * scaling is not supported, and the source rectangle size doesn't match * the destination rectangle size, the driver must return a * -<errorname>EINVAL</errorname> error. * * Drivers implementing atomic modeset should use * drm_atomic_helper_update_plane() to implement this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*update_plane)(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, struct drm_modeset_acquire_ctx *ctx); /** * @disable_plane: * * This is the legacy entry point to disable the plane. The DRM core * calls this method in response to a DRM_IOCTL_MODE_SETPLANE IOCTL call * with the frame buffer ID set to 0. Disabled planes must not be * processed by the CRTC. * * Drivers implementing atomic modeset should use * drm_atomic_helper_disable_plane() to implement this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*disable_plane)(struct drm_plane *plane, struct drm_modeset_acquire_ctx *ctx); /** * @destroy: * * Clean up plane resources. This is only called at driver unload time * through drm_mode_config_cleanup() since a plane cannot be hotplugged * in DRM. */ void (*destroy)(struct drm_plane *plane); /** * @reset: * * Reset plane hardware and software state to off. This function isn't * called by the core directly, only through drm_mode_config_reset(). * It's not a helper hook only for historical reasons. * * Atomic drivers can use drm_atomic_helper_plane_reset() to reset * atomic state using this hook. */ void (*reset)(struct drm_plane *plane); /** * @set_property: * * This is the legacy entry point to update a property attached to the * plane. * * This callback is optional if the driver does not support any legacy * driver-private properties. For atomic drivers it is not used because * property handling is done entirely in the DRM core. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*set_property)(struct drm_plane *plane, struct drm_property *property, uint64_t val); /** * @atomic_duplicate_state: * * Duplicate the current atomic state for this plane and return it. * The core and helpers guarantee that any atomic state duplicated with * this hook and still owned by the caller (i.e. not transferred to the * driver by calling &drm_mode_config_funcs.atomic_commit) will be * cleaned up by calling the @atomic_destroy_state hook in this * structure. * * This callback is mandatory for atomic drivers. * * Atomic drivers which don't subclass &struct drm_plane_state should use * drm_atomic_helper_plane_duplicate_state(). Drivers that subclass the * state structure to extend it with driver-private state should use * __drm_atomic_helper_plane_duplicate_state() to make sure shared state is * duplicated in a consistent fashion across drivers. * * It is an error to call this hook before &drm_plane.state has been * initialized correctly. * * NOTE: * * If the duplicate state references refcounted resources this hook must * acquire a reference for each of them. The driver must release these * references again in @atomic_destroy_state. * * RETURNS: * * Duplicated atomic state or NULL when the allocation failed. */ struct drm_plane_state *(*atomic_duplicate_state)(struct drm_plane *plane); /** * @atomic_destroy_state: * * Destroy a state duplicated with @atomic_duplicate_state and release * or unreference all resources it references * * This callback is mandatory for atomic drivers. */ void (*atomic_destroy_state)(struct drm_plane *plane, struct drm_plane_state *state); /** * @atomic_set_property: * * Decode a driver-private property value and store the decoded value * into the passed-in state structure. Since the atomic core decodes all * standardized properties (even for extensions beyond the core set of * properties which might not be implemented by all drivers) this * requires drivers to subclass the state structure. * * Such driver-private properties should really only be implemented for * truly hardware/vendor specific state. Instead it is preferred to * standardize atomic extension and decode the properties used to expose * such an extension in the core. * * Do not call this function directly, use * drm_atomic_plane_set_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * NOTE: * * This function is called in the state assembly phase of atomic * modesets, which can be aborted for any reason (including on * userspace's request to just check whether a configuration would be * possible). Drivers MUST NOT touch any persistent state (hardware or * software) or data structures except the passed in @state parameter. * * Also since userspace controls in which order properties are set this * function must not do any input validation (since the state update is * incomplete and hence likely inconsistent). Instead any such input * validation must be done in the various atomic_check callbacks. * * RETURNS: * * 0 if the property has been found, -EINVAL if the property isn't * implemented by the driver (which shouldn't ever happen, the core only * asks for properties attached to this plane). No other validation is * allowed by the driver. The core already checks that the property * value is within the range (integer, valid enum value, ...) the driver * set when registering the property. */ int (*atomic_set_property)(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, uint64_t val); /** * @atomic_get_property: * * Reads out the decoded driver-private property. This is used to * implement the GETPLANE IOCTL. * * Do not call this function directly, use * drm_atomic_plane_get_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * RETURNS: * * 0 on success, -EINVAL if the property isn't implemented by the * driver (which should never happen, the core only asks for * properties attached to this plane). */ int (*atomic_get_property)(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, uint64_t *val); /** * @late_register: * * This optional hook can be used to register additional userspace * interfaces attached to the plane like debugfs interfaces. * It is called late in the driver load sequence from drm_dev_register(). * Everything added from this callback should be unregistered in * the early_unregister callback. * * Returns: * * 0 on success, or a negative error code on failure. */ int (*late_register)(struct drm_plane *plane); /** * @early_unregister: * * This optional hook should be used to unregister the additional * userspace interfaces attached to the plane from * @late_register. It is called from drm_dev_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. */ void (*early_unregister)(struct drm_plane *plane); /** * @atomic_print_state: * * If driver subclasses &struct drm_plane_state, it should implement * this optional hook for printing additional driver specific state. * * Do not call this directly, use drm_atomic_plane_print_state() * instead. */ void (*atomic_print_state)(struct drm_printer *p, const struct drm_plane_state *state); /** * @format_mod_supported: * * This optional hook is used for the DRM to determine if the given * format/modifier combination is valid for the plane. This allows the * DRM to generate the correct format bitmask (which formats apply to * which modifier), and to validate modifiers at atomic_check time. * * If not present, then any modifier in the plane's modifier * list is allowed with any of the plane's formats. * * Returns: * * True if the given modifier is valid for that format on the plane. * False otherwise. */ bool (*format_mod_supported)(struct drm_plane *plane, uint32_t format, uint64_t modifier); }; /** * enum drm_plane_type - uapi plane type enumeration * * For historical reasons not all planes are made the same. This enumeration is * used to tell the different types of planes apart to implement the different * uapi semantics for them. For userspace which is universal plane aware and * which is using that atomic IOCTL there's no difference between these planes * (beyong what the driver and hardware can support of course). * * For compatibility with legacy userspace, only overlay planes are made * available to userspace by default. Userspace clients may set the * &DRM_CLIENT_CAP_UNIVERSAL_PLANES client capability bit to indicate that they * wish to receive a universal plane list containing all plane types. See also * drm_for_each_legacy_plane(). * * In addition to setting each plane's type, drivers need to setup the * &drm_crtc.primary and optionally &drm_crtc.cursor pointers for legacy * IOCTLs. See drm_crtc_init_with_planes(). * * WARNING: The values of this enum is UABI since they're exposed in the "type" * property. */ enum drm_plane_type { /** * @DRM_PLANE_TYPE_OVERLAY: * * Overlay planes represent all non-primary, non-cursor planes. Some * drivers refer to these types of planes as "sprites" internally. */ DRM_PLANE_TYPE_OVERLAY, /** * @DRM_PLANE_TYPE_PRIMARY: * * A primary plane attached to a CRTC is the most likely to be able to * light up the CRTC when no scaling/cropping is used and the plane * covers the whole CRTC. */ DRM_PLANE_TYPE_PRIMARY, /** * @DRM_PLANE_TYPE_CURSOR: * * A cursor plane attached to a CRTC is more likely to be able to be * enabled when no scaling/cropping is used and the framebuffer has the * size indicated by &drm_mode_config.cursor_width and * &drm_mode_config.cursor_height. Additionally, if the driver doesn't * support modifiers, the framebuffer should have a linear layout. */ DRM_PLANE_TYPE_CURSOR, }; /** * struct drm_plane - central DRM plane control structure * * Planes represent the scanout hardware of a display block. They receive their * input data from a &drm_framebuffer and feed it to a &drm_crtc. Planes control * the color conversion, see `Plane Composition Properties`_ for more details, * and are also involved in the color conversion of input pixels, see `Color * Management Properties`_ for details on that. */ struct drm_plane { /** @dev: DRM device this plane belongs to */ struct drm_device *dev; /** * @head: * * List of all planes on @dev, linked from &drm_mode_config.plane_list. * Invariant over the lifetime of @dev and therefore does not need * locking. */ struct list_head head; /** @name: human readable name, can be overwritten by the driver */ char *name; /** * @mutex: * * Protects modeset plane state, together with the &drm_crtc.mutex of * CRTC this plane is linked to (when active, getting activated or * getting disabled). * * For atomic drivers specifically this protects @state. */ struct drm_modeset_lock mutex; /** @base: base mode object */ struct drm_mode_object base; /** * @possible_crtcs: pipes this plane can be bound to constructed from * drm_crtc_mask() */ uint32_t possible_crtcs; /** @format_types: array of formats supported by this plane */ uint32_t *format_types; /** @format_count: Size of the array pointed at by @format_types. */ unsigned int format_count; /** * @format_default: driver hasn't supplied supported formats for the * plane. Used by the non-atomic driver compatibility wrapper only. */ bool format_default; /** @modifiers: array of modifiers supported by this plane */ uint64_t *modifiers; /** @modifier_count: Size of the array pointed at by @modifier_count. */ unsigned int modifier_count; /** * @crtc: * * Currently bound CRTC, only meaningful for non-atomic drivers. For * atomic drivers this is forced to be NULL, atomic drivers should * instead check &drm_plane_state.crtc. */ struct drm_crtc *crtc; /** * @fb: * * Currently bound framebuffer, only meaningful for non-atomic drivers. * For atomic drivers this is forced to be NULL, atomic drivers should * instead check &drm_plane_state.fb. */ struct drm_framebuffer *fb; /** * @old_fb: * * Temporary tracking of the old fb while a modeset is ongoing. Only * used by non-atomic drivers, forced to be NULL for atomic drivers. */ struct drm_framebuffer *old_fb; /** @funcs: plane control functions */ const struct drm_plane_funcs *funcs; /** @properties: property tracking for this plane */ struct drm_object_properties properties; /** @type: Type of plane, see &enum drm_plane_type for details. */ enum drm_plane_type type; /** * @index: Position inside the mode_config.list, can be used as an array * index. It is invariant over the lifetime of the plane. */ unsigned index; /** @helper_private: mid-layer private data */ const struct drm_plane_helper_funcs *helper_private; /** * @state: * * Current atomic state for this plane. * * This is protected by @mutex. Note that nonblocking atomic commits * access the current plane state without taking locks. Either by going * through the &struct drm_atomic_state pointers, see * for_each_oldnew_plane_in_state(), for_each_old_plane_in_state() and * for_each_new_plane_in_state(). Or through careful ordering of atomic * commit operations as implemented in the atomic helpers, see * &struct drm_crtc_commit. */ struct drm_plane_state *state; /** * @alpha_property: * Optional alpha property for this plane. See * drm_plane_create_alpha_property(). */ struct drm_property *alpha_property; /** * @zpos_property: * Optional zpos property for this plane. See * drm_plane_create_zpos_property(). */ struct drm_property *zpos_property; /** * @rotation_property: * Optional rotation property for this plane. See * drm_plane_create_rotation_property(). */ struct drm_property *rotation_property; /** * @blend_mode_property: * Optional "pixel blend mode" enum property for this plane. * Blend mode property represents the alpha blending equation selection, * describing how the pixels from the current plane are composited with * the background. */ struct drm_property *blend_mode_property; /** * @color_encoding_property: * * Optional "COLOR_ENCODING" enum property for specifying * color encoding for non RGB formats. * See drm_plane_create_color_properties(). */ struct drm_property *color_encoding_property; /** * @color_range_property: * * Optional "COLOR_RANGE" enum property for specifying * color range for non RGB formats. * See drm_plane_create_color_properties(). */ struct drm_property *color_range_property; /** * @scaling_filter_property: property to apply a particular filter while * scaling. */ struct drm_property *scaling_filter_property; /** * @hotspot_x_property: property to set mouse hotspot x offset. */ struct drm_property *hotspot_x_property; /** * @hotspot_y_property: property to set mouse hotspot y offset. */ struct drm_property *hotspot_y_property; /** * @kmsg_panic: Used to register a panic notifier for this plane */ struct kmsg_dumper kmsg_panic; }; #define obj_to_plane(x) container_of(x, struct drm_plane, base) __printf(9, 10) int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type type, const char *name, ...); void drm_plane_cleanup(struct drm_plane *plane); __printf(10, 11) void *__drmm_universal_plane_alloc(struct drm_device *dev, size_t size, size_t offset, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type plane_type, const char *name, ...); /** * drmm_universal_plane_alloc - Allocate and initialize an universal plane object * @dev: DRM device * @type: the type of the struct which contains struct &drm_plane * @member: the name of the &drm_plane within @type * @possible_crtcs: bitmask of possible CRTCs * @funcs: callbacks for the new plane * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats * @format_modifiers: array of struct drm_format modifiers terminated by * DRM_FORMAT_MOD_INVALID * @plane_type: type of plane (overlay, primary, cursor) * @name: printf style format string for the plane name, or NULL for default name * * Allocates and initializes a plane object of type @type. Cleanup is * automatically handled through registering drm_plane_cleanup() with * drmm_add_action(). * * The @drm_plane_funcs.destroy hook must be NULL. * * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set * @format_modifiers to NULL. The plane will advertise the linear modifier. * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ #define drmm_universal_plane_alloc(dev, type, member, possible_crtcs, funcs, formats, \ format_count, format_modifiers, plane_type, name, ...) \ ((type *)__drmm_universal_plane_alloc(dev, sizeof(type), \ offsetof(type, member), \ possible_crtcs, funcs, formats, \ format_count, format_modifiers, \ plane_type, name, ##__VA_ARGS__)) __printf(10, 11) void *__drm_universal_plane_alloc(struct drm_device *dev, size_t size, size_t offset, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type plane_type, const char *name, ...); /** * drm_universal_plane_alloc() - Allocate and initialize an universal plane object * @dev: DRM device * @type: the type of the struct which contains struct &drm_plane * @member: the name of the &drm_plane within @type * @possible_crtcs: bitmask of possible CRTCs * @funcs: callbacks for the new plane * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats * @format_modifiers: array of struct drm_format modifiers terminated by * DRM_FORMAT_MOD_INVALID * @plane_type: type of plane (overlay, primary, cursor) * @name: printf style format string for the plane name, or NULL for default name * * Allocates and initializes a plane object of type @type. The caller * is responsible for releasing the allocated memory with kfree(). * * Drivers are encouraged to use drmm_universal_plane_alloc() instead. * * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set * @format_modifiers to NULL. The plane will advertise the linear modifier. * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ #define drm_universal_plane_alloc(dev, type, member, possible_crtcs, funcs, formats, \ format_count, format_modifiers, plane_type, name, ...) \ ((type *)__drm_universal_plane_alloc(dev, sizeof(type), \ offsetof(type, member), \ possible_crtcs, funcs, formats, \ format_count, format_modifiers, \ plane_type, name, ##__VA_ARGS__)) /** * drm_plane_index - find the index of a registered plane * @plane: plane to find index for * * Given a registered plane, return the index of that plane within a DRM * device's list of planes. */ static inline unsigned int drm_plane_index(const struct drm_plane *plane) { return plane->index; } /** * drm_plane_mask - find the mask of a registered plane * @plane: plane to find mask for */ static inline u32 drm_plane_mask(const struct drm_plane *plane) { return 1 << drm_plane_index(plane); } struct drm_plane * drm_plane_from_index(struct drm_device *dev, int idx); void drm_plane_force_disable(struct drm_plane *plane); int drm_mode_plane_set_obj_prop(struct drm_plane *plane, struct drm_property *property, uint64_t value); /** * drm_plane_find - find a &drm_plane * @dev: DRM device * @file_priv: drm file to check for lease against. * @id: plane id * * Returns the plane with @id, NULL if it doesn't exist. Simple wrapper around * drm_mode_object_find(). */ static inline struct drm_plane *drm_plane_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id) { struct drm_mode_object *mo; mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_PLANE); return mo ? obj_to_plane(mo) : NULL; } /** * drm_for_each_plane_mask - iterate over planes specified by bitmask * @plane: the loop cursor * @dev: the DRM device * @plane_mask: bitmask of plane indices * * Iterate over all planes specified by bitmask. */ #define drm_for_each_plane_mask(plane, dev, plane_mask) \ list_for_each_entry((plane), &(dev)->mode_config.plane_list, head) \ for_each_if ((plane_mask) & drm_plane_mask(plane)) /** * drm_for_each_legacy_plane - iterate over all planes for legacy userspace * @plane: the loop cursor * @dev: the DRM device * * Iterate over all legacy planes of @dev, excluding primary and cursor planes. * This is useful for implementing userspace apis when userspace is not * universal plane aware. See also &enum drm_plane_type. */ #define drm_for_each_legacy_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) \ for_each_if (plane->type == DRM_PLANE_TYPE_OVERLAY) /** * drm_for_each_plane - iterate over all planes * @plane: the loop cursor * @dev: the DRM device * * Iterate over all planes of @dev, include primary and cursor planes. */ #define drm_for_each_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) bool drm_plane_has_format(struct drm_plane *plane, u32 format, u64 modifier); bool drm_any_plane_has_format(struct drm_device *dev, u32 format, u64 modifier); void drm_plane_enable_fb_damage_clips(struct drm_plane *plane); unsigned int drm_plane_get_damage_clips_count(const struct drm_plane_state *state); struct drm_mode_rect * drm_plane_get_damage_clips(const struct drm_plane_state *state); int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); int drm_plane_add_size_hints_property(struct drm_plane *plane, const struct drm_plane_size_hint *hints, int num_hints); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_GPIO_DRIVER_H #define __LINUX_GPIO_DRIVER_H #include <linux/bits.h> #include <linux/cleanup.h> #include <linux/err.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> #include <linux/irqhandler.h> #include <linux/lockdep.h> #include <linux/pinctrl/pinconf-generic.h> #include <linux/pinctrl/pinctrl.h> #include <linux/property.h> #include <linux/spinlock_types.h> #include <linux/types.h> #ifdef CONFIG_GENERIC_MSI_IRQ #include <asm/msi.h> #endif struct device; struct irq_chip; struct irq_data; struct module; struct of_phandle_args; struct pinctrl_dev; struct seq_file; struct gpio_chip; struct gpio_desc; struct gpio_device; enum gpio_lookup_flags; enum gpiod_flags; union gpio_irq_fwspec { struct irq_fwspec fwspec; #ifdef CONFIG_GENERIC_MSI_IRQ msi_alloc_info_t msiinfo; #endif }; #define GPIO_LINE_DIRECTION_IN 1 #define GPIO_LINE_DIRECTION_OUT 0 /** * struct gpio_irq_chip - GPIO interrupt controller */ struct gpio_irq_chip { /** * @chip: * * GPIO IRQ chip implementation, provided by GPIO driver. */ struct irq_chip *chip; /** * @domain: * * Interrupt translation domain; responsible for mapping between GPIO * hwirq number and Linux IRQ number. */ struct irq_domain *domain; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /** * @fwnode: * * Firmware node corresponding to this gpiochip/irqchip, necessary * for hierarchical irqdomain support. */ struct fwnode_handle *fwnode; /** * @parent_domain: * * If non-NULL, will be set as the parent of this GPIO interrupt * controller's IRQ domain to establish a hierarchical interrupt * domain. The presence of this will activate the hierarchical * interrupt support. */ struct irq_domain *parent_domain; /** * @child_to_parent_hwirq: * * This callback translates a child hardware IRQ offset to a parent * hardware IRQ offset on a hierarchical interrupt chip. The child * hardware IRQs correspond to the GPIO index 0..ngpio-1 (see the * ngpio field of struct gpio_chip) and the corresponding parent * hardware IRQ and type (such as IRQ_TYPE_*) shall be returned by * the driver. The driver can calculate this from an offset or using * a lookup table or whatever method is best for this chip. Return * 0 on successful translation in the driver. * * If some ranges of hardware IRQs do not have a corresponding parent * HWIRQ, return -EINVAL, but also make sure to fill in @valid_mask and * @need_valid_mask to make these GPIO lines unavailable for * translation. */ int (*child_to_parent_hwirq)(struct gpio_chip *gc, unsigned int child_hwirq, unsigned int child_type, unsigned int *parent_hwirq, unsigned int *parent_type); /** * @populate_parent_alloc_arg : * * This optional callback allocates and populates the specific struct * for the parent's IRQ domain. If this is not specified, then * &gpiochip_populate_parent_fwspec_twocell will be used. A four-cell * variant named &gpiochip_populate_parent_fwspec_fourcell is also * available. */ int (*populate_parent_alloc_arg)(struct gpio_chip *gc, union gpio_irq_fwspec *fwspec, unsigned int parent_hwirq, unsigned int parent_type); /** * @child_offset_to_irq: * * This optional callback is used to translate the child's GPIO line * offset on the GPIO chip to an IRQ number for the GPIO to_irq() * callback. If this is not specified, then a default callback will be * provided that returns the line offset. */ unsigned int (*child_offset_to_irq)(struct gpio_chip *gc, unsigned int pin); /** * @child_irq_domain_ops: * * The IRQ domain operations that will be used for this GPIO IRQ * chip. If no operations are provided, then default callbacks will * be populated to setup the IRQ hierarchy. Some drivers need to * supply their own translate function. */ struct irq_domain_ops child_irq_domain_ops; #endif /** * @handler: * * The IRQ handler to use (often a predefined IRQ core function) for * GPIO IRQs, provided by GPIO driver. */ irq_flow_handler_t handler; /** * @default_type: * * Default IRQ triggering type applied during GPIO driver * initialization, provided by GPIO driver. */ unsigned int default_type; /** * @lock_key: * * Per GPIO IRQ chip lockdep class for IRQ lock. */ struct lock_class_key *lock_key; /** * @request_key: * * Per GPIO IRQ chip lockdep class for IRQ request. */ struct lock_class_key *request_key; /** * @parent_handler: * * The interrupt handler for the GPIO chip's parent interrupts, may be * NULL if the parent interrupts are nested rather than cascaded. */ irq_flow_handler_t parent_handler; union { /** * @parent_handler_data: * * If @per_parent_data is false, @parent_handler_data is a * single pointer used as the data associated with every * parent interrupt. */ void *parent_handler_data; /** * @parent_handler_data_array: * * If @per_parent_data is true, @parent_handler_data_array is * an array of @num_parents pointers, and is used to associate * different data for each parent. This cannot be NULL if * @per_parent_data is true. */ void **parent_handler_data_array; }; /** * @num_parents: * * The number of interrupt parents of a GPIO chip. */ unsigned int num_parents; /** * @parents: * * A list of interrupt parents of a GPIO chip. This is owned by the * driver, so the core will only reference this list, not modify it. */ unsigned int *parents; /** * @map: * * A list of interrupt parents for each line of a GPIO chip. */ unsigned int *map; /** * @threaded: * * True if set the interrupt handling uses nested threads. */ bool threaded; /** * @per_parent_data: * * True if parent_handler_data_array describes a @num_parents * sized array to be used as parent data. */ bool per_parent_data; /** * @initialized: * * Flag to track GPIO chip irq member's initialization. * This flag will make sure GPIO chip irq members are not used * before they are initialized. */ bool initialized; /** * @domain_is_allocated_externally: * * True it the irq_domain was allocated outside of gpiolib, in which * case gpiolib won't free the irq_domain itself. */ bool domain_is_allocated_externally; /** * @init_hw: optional routine to initialize hardware before * an IRQ chip will be added. This is quite useful when * a particular driver wants to clear IRQ related registers * in order to avoid undesired events. */ int (*init_hw)(struct gpio_chip *gc); /** * @init_valid_mask: optional routine to initialize @valid_mask, to be * used if not all GPIO lines are valid interrupts. Sometimes some * lines just cannot fire interrupts, and this routine, when defined, * is passed a bitmap in "valid_mask" and it will have ngpios * bits from 0..(ngpios-1) set to "1" as in valid. The callback can * then directly set some bits to "0" if they cannot be used for * interrupts. */ void (*init_valid_mask)(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios); /** * @valid_mask: * * If not %NULL, holds bitmask of GPIOs which are valid to be included * in IRQ domain of the chip. */ unsigned long *valid_mask; /** * @first: * * Required for static IRQ allocation. If set, irq_domain_add_simple() * will allocate and map all IRQs during initialization. */ unsigned int first; /** * @irq_enable: * * Store old irq_chip irq_enable callback */ void (*irq_enable)(struct irq_data *data); /** * @irq_disable: * * Store old irq_chip irq_disable callback */ void (*irq_disable)(struct irq_data *data); /** * @irq_unmask: * * Store old irq_chip irq_unmask callback */ void (*irq_unmask)(struct irq_data *data); /** * @irq_mask: * * Store old irq_chip irq_mask callback */ void (*irq_mask)(struct irq_data *data); }; /** * struct gpio_chip - abstract a GPIO controller * @label: a functional name for the GPIO device, such as a part * number or the name of the SoC IP-block implementing it. * @gpiodev: the internal state holder, opaque struct * @parent: optional parent device providing the GPIOs * @fwnode: optional fwnode providing this controller's properties * @owner: helps prevent removal of modules exporting active GPIOs * @request: optional hook for chip-specific activation, such as * enabling module power and clock; may sleep * @free: optional hook for chip-specific deactivation, such as * disabling module power and clock; may sleep * @get_direction: returns direction for signal "offset", 0=out, 1=in, * (same as GPIO_LINE_DIRECTION_OUT / GPIO_LINE_DIRECTION_IN), * or negative error. It is recommended to always implement this * function, even on input-only or output-only gpio chips. * @direction_input: configures signal "offset" as input, returns 0 on success * or a negative error number. This can be omitted on input-only or * output-only gpio chips. * @direction_output: configures signal "offset" as output, returns 0 on * success or a negative error number. This can be omitted on input-only * or output-only gpio chips. * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error * @set: assigns output value for signal "offset" * @set_multiple: assigns output values for multiple signals defined by "mask" * @set_config: optional hook for all kinds of settings. Uses the same * packed config format as generic pinconf. * @to_irq: optional hook supporting non-static gpiod_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code * will be used when this is omitted, but custom code can show extra * state (such as pullup/pulldown configuration). * @init_valid_mask: optional routine to initialize @valid_mask, to be used if * not all GPIOs are valid. * @add_pin_ranges: optional routine to initialize pin ranges, to be used when * requires special mapping of the pins that provides GPIO functionality. * It is called after adding GPIO chip and before adding IRQ chip. * @en_hw_timestamp: Dependent on GPIO chip, an optional routine to * enable hardware timestamp. * @dis_hw_timestamp: Dependent on GPIO chip, an optional routine to * disable hardware timestamp. * @base: identifies the first GPIO number handled by this chip; * or, if negative during registration, requests dynamic ID allocation. * DEPRECATION: providing anything non-negative and nailing the base * offset of GPIO chips is deprecated. Please pass -1 as base to * let gpiolib select the chip base in all possible cases. We want to * get rid of the static GPIO number space in the long run. * @ngpio: the number of GPIOs handled by this controller; the last GPIO * handled is (base + ngpio - 1). * @offset: when multiple gpio chips belong to the same device this * can be used as offset within the device so friendly names can * be properly assigned. * @names: if set, must be an array of strings to use as alternative * names for the GPIOs in this chip. Any entry in the array * may be NULL if there is no alias for the GPIO, however the * array must be @ngpio entries long. * @can_sleep: flag must be set iff get()/set() methods sleep, as they * must while accessing GPIO expander chips over I2C or SPI. This * implies that if the chip supports IRQs, these IRQs need to be threaded * as the chip access may sleep when e.g. reading out the IRQ status * registers. * @read_reg: reader function for generic GPIO * @write_reg: writer function for generic GPIO * @be_bits: if the generic GPIO has big endian bit order (bit 31 is representing * line 0, bit 30 is line 1 ... bit 0 is line 31) this is set to true by the * generic GPIO core. It is for internal housekeeping only. * @reg_dat: data (in) register for generic GPIO * @reg_set: output set register (out=high) for generic GPIO * @reg_clr: output clear register (out=low) for generic GPIO * @reg_dir_out: direction out setting register for generic GPIO * @reg_dir_in: direction in setting register for generic GPIO * @bgpio_dir_unreadable: indicates that the direction register(s) cannot * be read and we need to rely on out internal state tracking. * @bgpio_bits: number of register bits used for a generic GPIO i.e. * <register width> * 8 * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep * shadowed and real data registers writes together. * @bgpio_data: shadowed data register for generic GPIO to clear/set bits * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. A "1" in this word means the line is set as * output. * * A gpio_chip can help platforms abstract various sources of GPIOs so * they can all be accessed through a common programming interface. * Example sources would be SOC controllers, FPGAs, multifunction * chips, dedicated GPIO expanders, and so on. * * Each chip controls a number of signals, identified in method calls * by "offset" values in the range 0..(@ngpio - 1). When those signals * are referenced through calls like gpio_get_value(gpio), the offset * is calculated by subtracting @base from the gpio number. */ struct gpio_chip { const char *label; struct gpio_device *gpiodev; struct device *parent; struct fwnode_handle *fwnode; struct module *owner; int (*request)(struct gpio_chip *gc, unsigned int offset); void (*free)(struct gpio_chip *gc, unsigned int offset); int (*get_direction)(struct gpio_chip *gc, unsigned int offset); int (*direction_input)(struct gpio_chip *gc, unsigned int offset); int (*direction_output)(struct gpio_chip *gc, unsigned int offset, int value); int (*get)(struct gpio_chip *gc, unsigned int offset); int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); void (*set)(struct gpio_chip *gc, unsigned int offset, int value); void (*set_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); int (*to_irq)(struct gpio_chip *gc, unsigned int offset); void (*dbg_show)(struct seq_file *s, struct gpio_chip *gc); int (*init_valid_mask)(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios); int (*add_pin_ranges)(struct gpio_chip *gc); int (*en_hw_timestamp)(struct gpio_chip *gc, u32 offset, unsigned long flags); int (*dis_hw_timestamp)(struct gpio_chip *gc, u32 offset, unsigned long flags); int base; u16 ngpio; u16 offset; const char *const *names; bool can_sleep; #if IS_ENABLED(CONFIG_GPIO_GENERIC) unsigned long (*read_reg)(void __iomem *reg); void (*write_reg)(void __iomem *reg, unsigned long data); bool be_bits; void __iomem *reg_dat; void __iomem *reg_set; void __iomem *reg_clr; void __iomem *reg_dir_out; void __iomem *reg_dir_in; bool bgpio_dir_unreadable; int bgpio_bits; raw_spinlock_t bgpio_lock; unsigned long bgpio_data; unsigned long bgpio_dir; #endif /* CONFIG_GPIO_GENERIC */ #ifdef CONFIG_GPIOLIB_IRQCHIP /* * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ /** * @irq: * * Integrates interrupt chip functionality with the GPIO chip. Can be * used to handle IRQs for most practical cases. */ struct gpio_irq_chip irq; #endif /* CONFIG_GPIOLIB_IRQCHIP */ /** * @valid_mask: * * If not %NULL, holds bitmask of GPIOs which are valid to be used * from the chip. */ unsigned long *valid_mask; #if defined(CONFIG_OF_GPIO) /* * If CONFIG_OF_GPIO is enabled, then all GPIO controllers described in * the device tree automatically may have an OF translation */ /** * @of_gpio_n_cells: * * Number of cells used to form the GPIO specifier. */ unsigned int of_gpio_n_cells; /** * @of_xlate: * * Callback to translate a device tree GPIO specifier into a chip- * relative GPIO number and flags. */ int (*of_xlate)(struct gpio_chip *gc, const struct of_phandle_args *gpiospec, u32 *flags); #endif /* CONFIG_OF_GPIO */ }; char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset); struct _gpiochip_for_each_data { const char **label; unsigned int *i; }; DEFINE_CLASS(_gpiochip_for_each_data, struct _gpiochip_for_each_data, if (*_T.label) kfree(*_T.label), ({ struct _gpiochip_for_each_data _data = { label, i }; *_data.i = 0; _data; }), const char **label, int *i) /** * for_each_hwgpio - Iterates over all GPIOs for given chip. * @_chip: Chip to iterate over. * @_i: Loop counter. * @_label: Place to store the address of the label if the GPIO is requested. * Set to NULL for unused GPIOs. */ #define for_each_hwgpio(_chip, _i, _label) \ for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ *_data.i < _chip->ngpio; \ (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ if (IS_ERR(*_data.label = \ gpiochip_dup_line_label(_chip, *_data.i))) {} \ else /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range * @_chip: the chip to query * @_i: loop variable * @_base: first GPIO in the range * @_size: amount of GPIOs to check starting from @base * @_label: label of current GPIO */ #define for_each_requested_gpio_in_range(_chip, _i, _base, _size, _label) \ for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ *_data.i < _size; \ (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ if ((*_data.label = \ gpiochip_dup_line_label(_chip, _base + *_data.i)) == NULL) {} \ else if (IS_ERR(*_data.label)) {} \ else /* Iterates over all requested GPIO of the given @chip */ #define for_each_requested_gpio(chip, i, label) \ for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label) /* add/remove chips */ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip * @gc: the chip to register, with gc->base initialized * @data: driver-private data associated with this chip * * Context: potentially before irqs will work * * When gpiochip_add_data() is called very early during boot, so that GPIOs * can be freely used, the gc->parent device must be registered before * the gpio framework's arch_initcall(). Otherwise sysfs initialization * for GPIOs will fail rudely. * * gpiochip_add_data() must only be called after gpiolib initialization, * i.e. after core_initcall(). * * If gc->base is negative, this requests dynamic assignment of * a range of valid GPIOs. * * Returns: * A negative errno if the chip can't be registered, such as because the * gc->base is invalid or already associated with a different chip. * Otherwise it returns zero as a success code. */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(gc, data) ({ \ static struct lock_class_key lock_key; \ static struct lock_class_key request_key; \ gpiochip_add_data_with_key(gc, data, &lock_key, \ &request_key); \ }) #define devm_gpiochip_add_data(dev, gc, data) ({ \ static struct lock_class_key lock_key; \ static struct lock_class_key request_key; \ devm_gpiochip_add_data_with_key(dev, gc, data, &lock_key, \ &request_key); \ }) #else #define gpiochip_add_data(gc, data) gpiochip_add_data_with_key(gc, data, NULL, NULL) #define devm_gpiochip_add_data(dev, gc, data) \ devm_gpiochip_add_data_with_key(dev, gc, data, NULL, NULL) #endif /* CONFIG_LOCKDEP */ void gpiochip_remove(struct gpio_chip *gc); int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key); struct gpio_device *gpio_device_find(const void *data, int (*match)(struct gpio_chip *gc, const void *data)); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); DEFINE_FREE(gpio_device_put, struct gpio_device *, if (!IS_ERR_OR_NULL(_T)) gpio_device_put(_T)) struct device *gpio_device_to_device(struct gpio_device *gdev); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset); /* irq_data versions of the above */ int gpiochip_irq_reqres(struct irq_data *data); void gpiochip_irq_relres(struct irq_data *data); /* Paste this in your irq_chip structure */ #define GPIOCHIP_IRQ_RESOURCE_HELPERS \ .irq_request_resources = gpiochip_irq_reqres, \ .irq_release_resources = gpiochip_irq_relres static inline void gpio_irq_chip_set_chip(struct gpio_irq_chip *girq, const struct irq_chip *chip) { /* Yes, dropping const is ugly, but it isn't like we have a choice */ girq->chip = (struct irq_chip *)chip; } /* Line status inquiry for drivers */ bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset); /* Sleep persistence inquiry for drivers */ bool gpiochip_line_is_persistent(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset); /* get driver data */ void *gpiochip_get_data(struct gpio_chip *gc); struct bgpio_pdata { const char *label; int base; int ngpio; }; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type); int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type); #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ int bgpio_init(struct gpio_chip *gc, struct device *dev, unsigned long sz, void __iomem *dat, void __iomem *set, void __iomem *clr, void __iomem *dirout, void __iomem *dirin, unsigned long flags); #define BGPIOF_BIG_ENDIAN BIT(0) #define BGPIOF_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ #define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ #define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) #define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); #else #include <asm/bug.h> static inline int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain) { WARN_ON(1); return -EINVAL; } #endif int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset); void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset); int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, unsigned long config); /** * struct gpio_pin_range - pin range controlled by a gpio chip * @node: list for maintaining set of pin ranges, used internally * @pctldev: pinctrl device which handles corresponding pins * @range: actual range of pins controlled by a gpio controller */ struct gpio_pin_range { struct list_head node; struct pinctrl_dev *pctldev; struct pinctrl_gpio_range range; }; #ifdef CONFIG_PINCTRL int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, unsigned int npins); int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group); void gpiochip_remove_pin_ranges(struct gpio_chip *gc); #else /* ! CONFIG_PINCTRL */ static inline int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, unsigned int npins) { return 0; } static inline int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group) { return 0; } static inline void gpiochip_remove_pin_ranges(struct gpio_chip *gc) { } #endif /* CONFIG_PINCTRL */ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, unsigned int hwnum, const char *label, enum gpio_lookup_flags lflags, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); struct gpio_desc * gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev); #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); /* struct gpio_device getters */ int gpio_device_get_base(struct gpio_device *gdev); const char *gpio_device_get_label(struct gpio_device *gdev); struct gpio_device *gpio_device_find_by_label(const char *label); struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); #else /* CONFIG_GPIOLIB */ #include <asm/bug.h> static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) { /* GPIO can never have been requested */ WARN_ON(1); return ERR_PTR(-ENODEV); } static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) { WARN_ON(1); return ERR_PTR(-ENODEV); } static inline int gpio_device_get_base(struct gpio_device *gdev) { WARN_ON(1); return -ENODEV; } static inline const char *gpio_device_get_label(struct gpio_device *gdev) { WARN_ON(1); return NULL; } static inline struct gpio_device *gpio_device_find_by_label(const char *label) { WARN_ON(1); return NULL; } static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) { WARN_ON(1); return NULL; } static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { WARN_ON(1); return -EINVAL; } static inline void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset) { WARN_ON(1); } #endif /* CONFIG_GPIOLIB */ #define for_each_gpiochip_node(dev, child) \ device_for_each_child_node(dev, child) \ if (!fwnode_property_present(child, "gpio-controller")) {} else static inline unsigned int gpiochip_node_count(struct device *dev) { struct fwnode_handle *child; unsigned int count = 0; for_each_gpiochip_node(dev, child) count++; return count; } static inline struct fwnode_handle *gpiochip_node_get_first(struct device *dev) { struct fwnode_handle *fwnode; for_each_gpiochip_node(dev, fwnode) return fwnode; return NULL; } #endif /* __LINUX_GPIO_DRIVER_H */
3 3 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 3 17 2 2 2 2 2 10 4 1 1 1 3 3 1 2 2 3 2 1 1 1 16 1 1 4 4 2 8 9 1 2 2 2 2 2 2 2 2 14 14 1 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_gred.c Generic Random Early Detection queue. * * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 * * 991129: - Bug fix with grio mode * - a better sing. AvgQ mode with Grio(WRED) * - A finer grained VQ dequeue based on suggestion * from Ren Liu * - More error checks * * For all the glorious comments look at include/net/red.h */ #include <linux/slab.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> #include <net/red.h> #define GRED_DEF_PRIO (MAX_DPs / 2) #define GRED_VQ_MASK (MAX_DPs - 1) #define GRED_VQ_RED_FLAGS (TC_RED_ECN | TC_RED_HARDDROP) struct gred_sched_data; struct gred_sched; struct gred_sched_data { u32 limit; /* HARD maximal queue length */ u32 DP; /* the drop parameters */ u32 red_flags; /* virtualQ version of red_flags */ u64 bytesin; /* bytes seen on virtualQ so far*/ u32 packetsin; /* packets seen on virtualQ so far*/ u32 backlog; /* bytes on the virtualQ */ u8 prio; /* the prio of this vq */ struct red_parms parms; struct red_vars vars; struct red_stats stats; }; enum { GRED_WRED_MODE = 1, GRED_RIO_MODE, }; struct gred_sched { struct gred_sched_data *tab[MAX_DPs]; unsigned long flags; u32 red_flags; u32 DPs; u32 def; struct red_vars wred_set; struct tc_gred_qopt_offload *opt; }; static inline int gred_wred_mode(struct gred_sched *table) { return test_bit(GRED_WRED_MODE, &table->flags); } static inline void gred_enable_wred_mode(struct gred_sched *table) { __set_bit(GRED_WRED_MODE, &table->flags); } static inline void gred_disable_wred_mode(struct gred_sched *table) { __clear_bit(GRED_WRED_MODE, &table->flags); } static inline int gred_rio_mode(struct gred_sched *table) { return test_bit(GRED_RIO_MODE, &table->flags); } static inline void gred_enable_rio_mode(struct gred_sched *table) { __set_bit(GRED_RIO_MODE, &table->flags); } static inline void gred_disable_rio_mode(struct gred_sched *table) { __clear_bit(GRED_RIO_MODE, &table->flags); } static inline int gred_wred_mode_check(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); int i; /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ for (i = 0; i < table->DPs; i++) { struct gred_sched_data *q = table->tab[i]; int n; if (q == NULL) continue; for (n = i + 1; n < table->DPs; n++) if (table->tab[n] && table->tab[n]->prio == q->prio) return 1; } return 0; } static inline unsigned int gred_backlog(struct gred_sched *table, struct gred_sched_data *q, struct Qdisc *sch) { if (gred_wred_mode(table)) return sch->qstats.backlog; else return q->backlog; } static inline u16 tc_index_to_dp(struct sk_buff *skb) { return skb->tc_index & GRED_VQ_MASK; } static inline void gred_load_wred_set(const struct gred_sched *table, struct gred_sched_data *q) { q->vars.qavg = table->wred_set.qavg; q->vars.qidlestart = table->wred_set.qidlestart; } static inline void gred_store_wred_set(struct gred_sched *table, struct gred_sched_data *q) { table->wred_set.qavg = q->vars.qavg; table->wred_set.qidlestart = q->vars.qidlestart; } static int gred_use_ecn(struct gred_sched_data *q) { return q->red_flags & TC_RED_ECN; } static int gred_use_harddrop(struct gred_sched_data *q) { return q->red_flags & TC_RED_HARDDROP; } static bool gred_per_vq_red_flags_used(struct gred_sched *table) { unsigned int i; /* Local per-vq flags couldn't have been set unless global are 0 */ if (table->red_flags) return false; for (i = 0; i < MAX_DPs; i++) if (table->tab[i] && table->tab[i]->red_flags) return true; return false; } static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct gred_sched_data *q = NULL; struct gred_sched *t = qdisc_priv(sch); unsigned long qavg = 0; u16 dp = tc_index_to_dp(skb); if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { dp = t->def; q = t->tab[dp]; if (!q) { /* Pass through packets not assigned to a DP * if no default DP has been configured. This * allows for DP flows to be left untouched. */ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) return qdisc_enqueue_tail(skb, sch); else goto drop; } /* fix tc_index? --could be controversial but needed for requeueing */ skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; } /* sum up all the qaves of prios < ours to get the new qave */ if (!gred_wred_mode(t) && gred_rio_mode(t)) { int i; for (i = 0; i < t->DPs; i++) { if (t->tab[i] && t->tab[i]->prio < q->prio && !red_is_idling(&t->tab[i]->vars)) qavg += t->tab[i]->vars.qavg; } } q->packetsin++; q->bytesin += qdisc_pkt_len(skb); if (gred_wred_mode(t)) gred_load_wred_set(t, q); q->vars.qavg = red_calc_qavg(&q->parms, &q->vars, gred_backlog(t, q, sch)); if (red_is_idling(&q->vars)) red_end_of_idle_period(&q->vars); if (gred_wred_mode(t)) gred_store_wred_set(t, q); switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) { case RED_DONT_MARK: break; case RED_PROB_MARK: qdisc_qstats_overlimit(sch); if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.prob_drop++; goto congestion_drop; } q->stats.prob_mark++; break; case RED_HARD_MARK: qdisc_qstats_overlimit(sch); if (gred_use_harddrop(q) || !gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { q->stats.forced_drop++; goto congestion_drop; } q->stats.forced_mark++; break; } if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) { q->backlog += qdisc_pkt_len(skb); return qdisc_enqueue_tail(skb, sch); } q->stats.pdrop++; drop: return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); congestion_drop: qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_CONGESTED); return NET_XMIT_CN; } static struct sk_buff *gred_dequeue(struct Qdisc *sch) { struct sk_buff *skb; struct gred_sched *t = qdisc_priv(sch); skb = qdisc_dequeue_head(sch); if (skb) { struct gred_sched_data *q; u16 dp = tc_index_to_dp(skb); if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n", tc_index_to_dp(skb)); } else { q->backlog -= qdisc_pkt_len(skb); if (gred_wred_mode(t)) { if (!sch->qstats.backlog) red_start_of_idle_period(&t->wred_set); } else { if (!q->backlog) red_start_of_idle_period(&q->vars); } } return skb; } return NULL; } static void gred_reset(struct Qdisc *sch) { int i; struct gred_sched *t = qdisc_priv(sch); qdisc_reset_queue(sch); for (i = 0; i < t->DPs; i++) { struct gred_sched_data *q = t->tab[i]; if (!q) continue; red_restart(&q->vars); q->backlog = 0; } } static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) { struct gred_sched *table = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_gred_qopt_offload *opt = table->opt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; memset(opt, 0, sizeof(*opt)); opt->command = command; opt->handle = sch->handle; opt->parent = sch->parent; if (command == TC_GRED_REPLACE) { unsigned int i; opt->set.grio_on = gred_rio_mode(table); opt->set.wred_on = gred_wred_mode(table); opt->set.dp_cnt = table->DPs; opt->set.dp_def = table->def; for (i = 0; i < table->DPs; i++) { struct gred_sched_data *q = table->tab[i]; if (!q) continue; opt->set.tab[i].present = true; opt->set.tab[i].limit = q->limit; opt->set.tab[i].prio = q->prio; opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; opt->set.tab[i].is_ecn = gred_use_ecn(q); opt->set.tab[i].is_harddrop = gred_use_harddrop(q); opt->set.tab[i].probability = q->parms.max_P; opt->set.tab[i].backlog = &q->backlog; } opt->set.qstats = &sch->qstats; } dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt); } static int gred_offload_dump_stats(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt_offload *hw_stats; u64 bytes = 0, packets = 0; unsigned int i; int ret; hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL); if (!hw_stats) return -ENOMEM; hw_stats->command = TC_GRED_STATS; hw_stats->handle = sch->handle; hw_stats->parent = sch->parent; for (i = 0; i < MAX_DPs; i++) { gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); if (table->tab[i]) hw_stats->stats.xstats[i] = &table->tab[i]->stats; } ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); /* Even if driver returns failure adjust the stats - in case offload * ended but driver still wants to adjust the values. */ sch_tree_lock(sch); for (i = 0; i < MAX_DPs; i++) { if (!table->tab[i]) continue; table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; sch->qstats.drops += hw_stats->stats.qstats[i].drops; sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; } _bstats_update(&sch->bstats, bytes, packets); sch_tree_unlock(sch); kfree(hw_stats); return ret; } static inline void gred_destroy_vq(struct gred_sched_data *q) { kfree(q); } static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_sopt *sopt; bool red_flags_changed; int i; if (!dps) return -EINVAL; sopt = nla_data(dps); if (sopt->DPs > MAX_DPs) { NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high"); return -EINVAL; } if (sopt->DPs == 0) { NL_SET_ERR_MSG_MOD(extack, "number of virtual queues can't be 0"); return -EINVAL; } if (sopt->def_DP >= sopt->DPs) { NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count"); return -EINVAL; } if (sopt->flags && gred_per_vq_red_flags_used(table)) { NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used"); return -EINVAL; } sch_tree_lock(sch); table->DPs = sopt->DPs; table->def = sopt->def_DP; red_flags_changed = table->red_flags != sopt->flags; table->red_flags = sopt->flags; /* * Every entry point to GRED is synchronized with the above code * and the DP is checked against DPs, i.e. shadowed VQs can no * longer be found so we can unlock right here. */ sch_tree_unlock(sch); if (sopt->grio) { gred_enable_rio_mode(table); gred_disable_wred_mode(table); if (gred_wred_mode_check(sch)) gred_enable_wred_mode(table); } else { gred_disable_rio_mode(table); gred_disable_wred_mode(table); } if (red_flags_changed) for (i = 0; i < table->DPs; i++) if (table->tab[i]) table->tab[i]->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", i); gred_destroy_vq(table->tab[i]); table->tab[i] = NULL; } } gred_offload(sch, TC_GRED_REPLACE); return 0; } static inline int gred_change_vq(struct Qdisc *sch, int dp, struct tc_gred_qopt *ctl, int prio, u8 *stab, u32 max_P, struct gred_sched_data **prealloc, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) { NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; } if (!q) { table->tab[dp] = q = *prealloc; *prealloc = NULL; if (!q) return -ENOMEM; q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; } q->DP = dp; q->prio = prio; if (ctl->limit > sch->limit) q->limit = sch->limit; else q->limit = ctl->limit; if (q->backlog == 0) red_end_of_idle_period(&q->vars); red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, stab, max_P); red_set_vars(&q->vars); return 0; } static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = { [TCA_GRED_VQ_DP] = { .type = NLA_U32 }, [TCA_GRED_VQ_FLAGS] = { .type = NLA_U32 }, }; static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = { [TCA_GRED_VQ_ENTRY] = { .type = NLA_NESTED }, }; static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, [TCA_GRED_MAX_P] = { .type = NLA_U32 }, [TCA_GRED_LIMIT] = { .type = NLA_U32 }, [TCA_GRED_VQ_LIST] = { .type = NLA_NESTED }, }; static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) { struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; u32 dp; nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL); dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); if (tb[TCA_GRED_VQ_FLAGS]) table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); } static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs) { const struct nlattr *attr; int rem; nla_for_each_nested(attr, vqs, rem) { switch (nla_type(attr)) { case TCA_GRED_VQ_ENTRY: gred_vq_apply(table, attr); break; } } } static int gred_vq_validate(struct gred_sched *table, u32 cdp, const struct nlattr *entry, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; int err; u32 dp; err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, extack); if (err < 0) return err; if (!tb[TCA_GRED_VQ_DP]) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified"); return -EINVAL; } dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); if (dp >= table->DPs) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds"); return -EINVAL; } if (dp != cdp && !table->tab[dp]) { NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated"); return -EINVAL; } if (tb[TCA_GRED_VQ_FLAGS]) { u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]); if (table->red_flags && table->red_flags != red_flags) { NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used"); return -EINVAL; } if (red_flags & ~GRED_VQ_RED_FLAGS) { NL_SET_ERR_MSG_MOD(extack, "invalid RED flags specified"); return -EINVAL; } } return 0; } static int gred_vqs_validate(struct gred_sched *table, u32 cdp, struct nlattr *vqs, struct netlink_ext_ack *extack) { const struct nlattr *attr; int rem, err; err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX, gred_vqe_policy, extack); if (err < 0) return err; nla_for_each_nested(attr, vqs, rem) { switch (nla_type(attr)) { case TCA_GRED_VQ_ENTRY: err = gred_vq_validate(table, cdp, attr, extack); if (err) return err; break; default: NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes"); return -EINVAL; } } if (rem > 0) { NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list"); return -EINVAL; } return 0; } static int gred_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt *ctl; struct nlattr *tb[TCA_GRED_MAX + 1]; int err, prio = GRED_DEF_PRIO; u8 *stab; u32 max_P; struct gred_sched_data *prealloc; err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } if (tb[TCA_GRED_PARMS] == NULL || tb[TCA_GRED_STAB] == NULL || tb[TCA_GRED_LIMIT] != NULL) { NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time"); return -EINVAL; } max_P = nla_get_u32_default(tb[TCA_GRED_MAX_P], 0); ctl = nla_data(tb[TCA_GRED_PARMS]); stab = nla_data(tb[TCA_GRED_STAB]); if (ctl->DP >= table->DPs) { NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count"); return -EINVAL; } if (tb[TCA_GRED_VQ_LIST]) { err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST], extack); if (err) return err; } if (gred_rio_mode(table)) { if (ctl->prio == 0) { int def_prio = GRED_DEF_PRIO; if (table->tab[table->def]) def_prio = table->tab[table->def]->prio; printk(KERN_DEBUG "GRED: DP %u does not have a prio " "setting default to %d\n", ctl->DP, def_prio); prio = def_prio; } else prio = ctl->prio; } prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); sch_tree_lock(sch); err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc, extack); if (err < 0) goto err_unlock_free; if (tb[TCA_GRED_VQ_LIST]) gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]); if (gred_rio_mode(table)) { gred_disable_wred_mode(table); if (gred_wred_mode_check(sch)) gred_enable_wred_mode(table); } sch_tree_unlock(sch); kfree(prealloc); gred_offload(sch, TC_GRED_REPLACE); return 0; err_unlock_free: sch_tree_unlock(sch); kfree(prealloc); return err; } static int gred_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct gred_sched *table = qdisc_priv(sch); struct nlattr *tb[TCA_GRED_MAX + 1]; int err; if (!opt) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) return err; if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) { NL_SET_ERR_MSG_MOD(extack, "virtual queue configuration can't be specified at initialization time"); return -EINVAL; } if (tb[TCA_GRED_LIMIT]) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); else sch->limit = qdisc_dev(sch)->tx_queue_len * psched_mtu(qdisc_dev(sch)); if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) { table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL); if (!table->opt) return -ENOMEM; } return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); } static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) { struct gred_sched *table = qdisc_priv(sch); struct nlattr *parms, *vqs, *opts = NULL; int i; u32 max_p[MAX_DPs]; struct tc_gred_sopt sopt = { .DPs = table->DPs, .def_DP = table->def, .grio = gred_rio_mode(table), .flags = table->red_flags, }; if (gred_offload_dump_stats(sch)) goto nla_put_failure; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; max_p[i] = q ? q->parms.max_P : 0; } if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) goto nla_put_failure; /* Old style all-in-one dump of VQs */ parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct tc_gred_qopt opt; unsigned long qavg; memset(&opt, 0, sizeof(opt)); if (!q) { /* hack -- fix at some point with proper message This is how we indicate to tc that there is no VQ at this DP */ opt.DP = MAX_DPs + i; goto append_opt; } opt.limit = q->limit; opt.DP = q->DP; opt.backlog = gred_backlog(table, q, sch); opt.prio = q->prio; opt.qth_min = q->parms.qth_min >> q->parms.Wlog; opt.qth_max = q->parms.qth_max >> q->parms.Wlog; opt.Wlog = q->parms.Wlog; opt.Plog = q->parms.Plog; opt.Scell_log = q->parms.Scell_log; opt.early = q->stats.prob_drop; opt.forced = q->stats.forced_drop; opt.pdrop = q->stats.pdrop; opt.packets = q->packetsin; opt.bytesin = q->bytesin; if (gred_wred_mode(table)) gred_load_wred_set(table, q); qavg = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg >> q->parms.Wlog); opt.qave = qavg >> q->parms.Wlog; append_opt: if (nla_append(skb, sizeof(opt), &opt) < 0) goto nla_put_failure; } nla_nest_end(skb, parms); /* Dump the VQs again, in more structured way */ vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST); if (!vqs) goto nla_put_failure; for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct nlattr *vq; if (!q) continue; vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY); if (!vq) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags)) goto nla_put_failure; /* Stats */ if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin, TCA_GRED_VQ_PAD)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG, gred_backlog(table, q, sch))) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP, q->stats.prob_drop)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK, q->stats.prob_mark)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP, q->stats.forced_drop)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK, q->stats.forced_mark)) goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop)) goto nla_put_failure; nla_nest_end(skb, vq); } nla_nest_end(skb, vqs); return nla_nest_end(skb, opts); nla_put_failure: nla_nest_cancel(skb, opts); return -EMSGSIZE; } static void gred_destroy(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); int i; for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .id = "gred", .priv_size = sizeof(struct gred_sched), .enqueue = gred_enqueue, .dequeue = gred_dequeue, .peek = qdisc_peek_head, .init = gred_init, .reset = gred_reset, .destroy = gred_destroy, .change = gred_change, .dump = gred_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("gred"); static int __init gred_module_init(void) { return register_qdisc(&gred_qdisc_ops); } static void __exit gred_module_exit(void) { unregister_qdisc(&gred_qdisc_ops); } module_init(gred_module_init) module_exit(gred_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic Random Early Detection qdisc");
1 1 1 8 1 1 3 2 4 1 4 4 1 1 1 1 1 1 1 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2012 Siemens AG * * Written by: * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <net/netlink.h> #include <net/nl802154.h> #include <net/mac802154.h> #include <net/ieee802154_netdev.h> #include <net/route.h> #include <net/cfg802154.h> #include "ieee802154_i.h" #include "cfg.h" static void ieee802154_tasklet_handler(struct tasklet_struct *t) { struct ieee802154_local *local = from_tasklet(local, t, tasklet); struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue))) { switch (skb->pkt_type) { case IEEE802154_RX_MSG: /* Clear skb->pkt_type in order to not confuse kernel * netstack. */ skb->pkt_type = 0; ieee802154_rx(local, skb); break; default: WARN(1, "mac802154: Packet is of unknown type %d\n", skb->pkt_type); kfree_skb(skb); break; } } } struct ieee802154_hw * ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) { struct wpan_phy *phy; struct ieee802154_local *local; size_t priv_size; if (WARN_ON(!ops || !(ops->xmit_async || ops->xmit_sync) || !ops->ed || !ops->start || !ops->stop || !ops->set_channel)) return NULL; /* Ensure 32-byte alignment of our private data and hw private data. * We use the wpan_phy priv data for both our ieee802154_local and for * the driver's private data * * in memory it'll be like this: * * +-------------------------+ * | struct wpan_phy | * +-------------------------+ * | struct ieee802154_local | * +-------------------------+ * | driver's private data | * +-------------------------+ * * Due to ieee802154 layer isn't aware of driver and MAC structures, * so lets align them here. */ priv_size = ALIGN(sizeof(*local), NETDEV_ALIGN) + priv_data_len; phy = wpan_phy_new(&mac802154_config_ops, priv_size); if (!phy) { pr_err("failure to allocate master IEEE802.15.4 device\n"); return NULL; } phy->privid = mac802154_wpan_phy_privid; local = wpan_phy_priv(phy); local->phy = phy; local->hw.phy = local->phy; local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); local->ops = ops; INIT_LIST_HEAD(&local->interfaces); INIT_LIST_HEAD(&local->rx_beacon_list); INIT_LIST_HEAD(&local->rx_mac_cmd_list); mutex_init(&local->iflist_mtx); tasklet_setup(&local->tasklet, ieee802154_tasklet_handler); skb_queue_head_init(&local->skb_queue); INIT_WORK(&local->sync_tx_work, ieee802154_xmit_sync_worker); INIT_DELAYED_WORK(&local->scan_work, mac802154_scan_worker); INIT_WORK(&local->rx_beacon_work, mac802154_rx_beacon_worker); INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker); INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker); init_completion(&local->assoc_done); /* init supported flags with 802.15.4 default ranges */ phy->supported.max_minbe = 8; phy->supported.min_maxbe = 3; phy->supported.max_maxbe = 8; phy->supported.min_frame_retries = 0; phy->supported.max_frame_retries = 7; phy->supported.max_csma_backoffs = 5; phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE; /* always supported */ phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE) | BIT(NL802154_IFTYPE_COORD); return &local->hw; } EXPORT_SYMBOL(ieee802154_alloc_hw); void ieee802154_configure_durations(struct wpan_phy *phy, unsigned int page, unsigned int channel) { u32 duration = 0; switch (page) { case 0: if (BIT(channel) & 0x1) /* 868 MHz BPSK 802.15.4-2003: 20 ksym/s */ duration = 50 * NSEC_PER_USEC; else if (BIT(channel) & 0x7FE) /* 915 MHz BPSK 802.15.4-2003: 40 ksym/s */ duration = 25 * NSEC_PER_USEC; else if (BIT(channel) & 0x7FFF800) /* 2400 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */ duration = 16 * NSEC_PER_USEC; break; case 2: if (BIT(channel) & 0x1) /* 868 MHz O-QPSK 802.15.4-2006: 25 ksym/s */ duration = 40 * NSEC_PER_USEC; else if (BIT(channel) & 0x7FE) /* 915 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */ duration = 16 * NSEC_PER_USEC; break; case 3: if (BIT(channel) & 0x3FFF) /* 2.4 GHz CSS 802.15.4a-2007: 1/6 Msym/s */ duration = 6 * NSEC_PER_USEC; break; default: break; } if (!duration) { pr_debug("Unknown PHY symbol duration\n"); return; } phy->symbol_duration = duration; phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC; phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC; } EXPORT_SYMBOL(ieee802154_configure_durations); void ieee802154_free_hw(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); BUG_ON(!list_empty(&local->interfaces)); mutex_destroy(&local->iflist_mtx); wpan_phy_free(local->phy); } EXPORT_SYMBOL(ieee802154_free_hw); static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy) { /* TODO warn on empty symbol_duration * Should be done when all drivers sets this value. */ wpan_phy->lifs_period = (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / NSEC_PER_USEC; wpan_phy->sifs_period = (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / NSEC_PER_USEC; } int ieee802154_register_hw(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); char mac_wq_name[IFNAMSIZ + 10] = {}; struct net_device *dev; int rc = -ENOSYS; local->workqueue = create_singlethread_workqueue(wpan_phy_name(local->phy)); if (!local->workqueue) { rc = -ENOMEM; goto out; } snprintf(mac_wq_name, IFNAMSIZ + 10, "%s-mac-cmds", wpan_phy_name(local->phy)); local->mac_wq = create_singlethread_workqueue(mac_wq_name); if (!local->mac_wq) { rc = -ENOMEM; goto out_wq; } hrtimer_init(&local->ifs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); local->ifs_timer.function = ieee802154_xmit_ifs_timer; wpan_phy_set_dev(local->phy, local->hw.parent); ieee802154_setup_wpan_phy_pib(local->phy); ieee802154_configure_durations(local->phy, local->phy->current_page, local->phy->current_channel); if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) { local->phy->supported.min_csma_backoffs = 4; local->phy->supported.max_csma_backoffs = 4; local->phy->supported.min_maxbe = 5; local->phy->supported.max_maxbe = 5; local->phy->supported.min_minbe = 3; local->phy->supported.max_minbe = 3; } if (!(hw->flags & IEEE802154_HW_FRAME_RETRIES)) { local->phy->supported.min_frame_retries = 3; local->phy->supported.max_frame_retries = 3; } if (hw->flags & IEEE802154_HW_PROMISCUOUS) local->phy->supported.iftypes |= BIT(NL802154_IFTYPE_MONITOR); rc = wpan_phy_register(local->phy); if (rc < 0) goto out_mac_wq; rtnl_lock(); dev = ieee802154_if_add(local, "wpan%d", NET_NAME_ENUM, NL802154_IFTYPE_NODE, cpu_to_le64(0x0000000000000000ULL)); if (IS_ERR(dev)) { rtnl_unlock(); rc = PTR_ERR(dev); goto out_phy; } rtnl_unlock(); return 0; out_phy: wpan_phy_unregister(local->phy); out_mac_wq: destroy_workqueue(local->mac_wq); out_wq: destroy_workqueue(local->workqueue); out: return rc; } EXPORT_SYMBOL(ieee802154_register_hw); void ieee802154_unregister_hw(struct ieee802154_hw *hw) { struct ieee802154_local *local = hw_to_local(hw); tasklet_kill(&local->tasklet); flush_workqueue(local->workqueue); rtnl_lock(); ieee802154_remove_interfaces(local); rtnl_unlock(); destroy_workqueue(local->mac_wq); destroy_workqueue(local->workqueue); wpan_phy_unregister(local->phy); } EXPORT_SYMBOL(ieee802154_unregister_hw); static int __init ieee802154_init(void) { return ieee802154_iface_init(); } static void __exit ieee802154_exit(void) { ieee802154_iface_exit(); rcu_barrier(); } subsys_initcall(ieee802154_init); module_exit(ieee802154_exit); MODULE_DESCRIPTION("IEEE 802.15.4 subsystem"); MODULE_LICENSE("GPL v2");
2609 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2012 ARM Ltd. */ #ifndef __ASM_MMU_H #define __ASM_MMU_H #include <asm/cputype.h> #define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ #define USER_ASID_BIT 48 #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) #ifndef __ASSEMBLY__ #include <linux/refcount.h> #include <asm/cpufeature.h> typedef struct { atomic64_t id; #ifdef CONFIG_COMPAT void *sigpage; #endif refcount_t pinned; void *vdso; unsigned long flags; u8 pkey_allocation_map; } mm_context_t; /* * We use atomic64_read() here because the ASID for an 'mm_struct' can * be reallocated when scheduling one of its threads following a * rollover event (see new_context() and flush_context()). In this case, * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush()) * may use a stale ASID. This is fine in principle as the new ASID is * guaranteed to be clean in the TLB, but the TLBI routines have to take * care to handle the following race: * * CPU 0 CPU 1 CPU 2 * * // ptep_clear_flush(mm) * xchg_relaxed(pte, 0) * DSB ISHST * old = ASID(mm) * | <rollover> * | new = new_context(mm) * \-----------------> atomic_set(mm->context.id, new) * cpu_switch_mm(mm) * // Hardware walk of pte using new ASID * TLBI(old) * * In this scenario, the barrier on CPU 0 and the dependency on CPU 1 * ensure that the page-table walker on CPU 1 *must* see the invalid PTE * written by CPU 0. */ #define ASID(mm) (atomic64_read(&(mm)->context.id) & 0xffff) static inline bool arm64_kernel_unmapped_at_el0(void) { return alternative_has_cap_unlikely(ARM64_UNMAP_KERNEL_AT_EL0); } extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); /* * This check is triggered during the early boot before the cpufeature * is initialised. Checking the status on the local CPU allows the boot * CPU to detect the need for non-global mappings and thus avoiding a * pagetable re-write after all the CPUs are booted. This check will be * anyway run on individual CPUs, allowing us to get the consistent * state once the SMP CPUs are up and thus make the switch to non-global * mappings if required. */ static inline bool kaslr_requires_kpti(void) { /* * E0PD does a similar job to KPTI so can be used instead * where available. */ if (IS_ENABLED(CONFIG_ARM64_E0PD)) { u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); if (cpuid_feature_extract_unsigned_field(mmfr2, ID_AA64MMFR2_EL1_E0PD_SHIFT)) return false; } /* * Systems affected by Cavium erratum 24756 are incompatible * with KPTI. */ if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { extern const struct midr_range cavium_erratum_27456_cpus[]; if (is_midr_in_range_list(read_cpuid_id(), cavium_erratum_27456_cpus)) return false; } return true; } #endif /* !__ASSEMBLY__ */ #endif
23 15 7 3 21 7 7 7 7 20 23 23 23 23 23 47 47 47 47 24 6 6 6 29 29 29 29 17 30 31 2 29 12 12 6 12 12 6 6 6 6 12 12 6 6 12 9 1 9 9 2 9 9 5 9 6 6 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 // SPDX-License-Identifier: GPL-2.0 /* * io_misc.c - fallocate, fpunch, truncate: */ #include "bcachefs.h" #include "alloc_foreground.h" #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" #include "error.h" #include "extents.h" #include "extent_update.h" #include "inode.h" #include "io_misc.h" #include "io_write.h" #include "logged_ops.h" #include "rebalance.h" #include "subvolume.h" /* Overwrites whatever was present with zeroes: */ int bch2_extent_fallocate(struct btree_trans *trans, subvol_inum inum, struct btree_iter *iter, u64 sectors, struct bch_io_opts opts, s64 *i_sectors_delta, struct write_point_specifier write_point) { struct bch_fs *c = trans->c; struct disk_reservation disk_res = { 0 }; struct closure cl; struct open_buckets open_buckets = { 0 }; struct bkey_s_c k; struct bkey_buf old, new; unsigned sectors_allocated = 0, new_replicas; bool unwritten = opts.nocow && c->sb.version >= bcachefs_metadata_version_unwritten_extents; int ret; bch2_bkey_buf_init(&old); bch2_bkey_buf_init(&new); closure_init_stack(&cl); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) return ret; sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); new_replicas = max(0, (int) opts.data_replicas - (int) bch2_bkey_nr_ptrs_fully_allocated(k)); /* * Get a disk reservation before (in the nocow case) calling * into the allocator: */ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); if (unlikely(ret)) goto err_noprint; bch2_bkey_buf_reassemble(&old, c, k); if (!unwritten) { struct bkey_i_reservation *reservation; bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); reservation = bkey_reservation_init(new.k); reservation->k.p = iter->pos; bch2_key_resize(&reservation->k, sectors); reservation->v.nr_replicas = opts.data_replicas; } else { struct bkey_i_extent *e; struct bch_devs_list devs_have; struct write_point *wp; devs_have.nr = 0; bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); e = bkey_extent_init(new.k); e->k.p = iter->pos; ret = bch2_alloc_sectors_start_trans(trans, opts.foreground_target, false, write_point, &devs_have, opts.data_replicas, opts.data_replicas, BCH_WATERMARK_normal, 0, &cl, &wp); if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) ret = -BCH_ERR_transaction_restart_nested; if (ret) goto err; sectors = min_t(u64, sectors, wp->sectors_free); sectors_allocated = sectors; bch2_key_resize(&e->k, sectors); bch2_open_bucket_get(c, wp, &open_buckets); bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); bch2_alloc_sectors_done(c, wp); extent_for_each_ptr(extent_i_to_s(e), ptr) ptr->unwritten = true; } ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, 0, i_sectors_delta, true); err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); if (should_print_err(ret)) { struct printbuf buf = PRINTBUF; bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9); prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); } err_noprint: bch2_open_buckets_put(c, &open_buckets); bch2_disk_reservation_put(c, &disk_res); bch2_bkey_buf_exit(&new, c); bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); bch2_wait_on_allocator(c, &cl); } return ret; } /* * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, s64 *i_sectors_delta) { struct bch_fs *c = trans->c; unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); struct bpos end_pos = POS(inum.inum, end); struct bkey_s_c k; int ret = 0, ret2 = 0; u32 snapshot; while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; if (ret) ret2 = ret; bch2_trans_begin(trans); ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) continue; bch2_btree_iter_set_snapshot(iter, snapshot); /* * peek_max() doesn't have ideal semantics for extents: */ k = bch2_btree_iter_peek_max(iter, end_pos); if (!k.k) break; ret = bkey_err(k); if (ret) continue; bkey_init(&delete.k); delete.k.p = iter->pos; /* create the biggest key we can */ bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, &disk_res, 0, i_sectors_delta, false); bch2_disk_reservation_put(c, &disk_res); } return ret ?: ret2; } int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, s64 *i_sectors_delta) { struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, start), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; return ret; } /* truncate: */ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size)); } static int truncate_set_isize(struct btree_trans *trans, subvol_inum inum, u64 new_i_size, bool warn) { struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?: (inode_u.bi_size = new_i_size, 0) ?: bch2_inode_write(trans, &iter, &inode_u); bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter fpunch_iter; struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; u64 new_i_size = le64_to_cpu(op->v.new_i_size); bool warn_errors = i_sectors_delta != NULL; int ret; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL)); if (ret) goto err; bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents, POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); bch2_trans_iter_exit(trans, &fpunch_iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; err: if (warn_errors) bch_err_fn(c, ret); return ret; } int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_truncate(trans, op_k, NULL); } int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) { struct bkey_i_logged_op_truncate op; bkey_logged_op_truncate_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.new_i_size = cpu_to_le64(new_i_size); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); struct btree_trans *trans = bch2_trans_get(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) goto out; ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; out: bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; } /* finsert/fcollapse: */ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset)); prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); } static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len, bool warn) { struct btree_iter iter; struct bch_inode_unpacked inode_u; int ret; offset <<= 9; len <<= 9; ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn); if (ret) return ret; if (len > 0) { if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { ret = -EFBIG; goto err; } if (offset >= inode_u.bi_size) { ret = -EINVAL; goto err; } } inode_u.bi_size += len; inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); ret = bch2_inode_write(trans, &iter, &inode_u); err: bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; struct bch_io_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; u64 len = abs(shift); u64 pos = le64_to_cpu(op->v.pos); bool insert = shift > 0; u32 snapshot; bool warn_errors = i_sectors_delta != NULL; int ret = 0; ret = bch2_inum_opts_get(trans, inum, &opts); if (ret) return ret; /* * check for missing subvolume before fpunch, as in resume we don't want * it to be a fatal error */ ret = lockrestart_do(trans, __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors)); if (ret) return ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_intent); switch (op->v.state) { case LOGGED_OP_FINSERT_start: op->v.state = LOGGED_OP_FINSERT_shift_extents; if (insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, len, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) goto err; } else { bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset)); ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto err; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_logged_op_update(trans, &op->k_i)); } fallthrough; case LOGGED_OP_FINSERT_shift_extents: while (1) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete, *copy; struct bkey_s_c k; struct bpos src_pos = POS(inum.inum, src_offset); bch2_trans_begin(trans); ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); if (ret) goto btree_err; bch2_btree_iter_set_snapshot(&iter, snapshot); bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); k = insert ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); if ((ret = bkey_err(k))) goto btree_err; if (!k.k || k.k->p.inode != inum.inum || bkey_le(k.k->p, POS(inum.inum, src_offset))) break; copy = bch2_bkey_make_mut_noupdate(trans, k); if ((ret = PTR_ERR_OR_ZERO(copy))) goto btree_err; if (insert && bkey_lt(bkey_start_pos(k.k), src_pos)) { bch2_cut_front(src_pos, copy); /* Splitting compressed extent? */ bch2_disk_reservation_add(c, &disk_res, copy->k.size * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)), BCH_DISK_RESERVATION_NOFAIL); } bkey_init(&delete.k); delete.k.p = copy->k.p; delete.k.p.snapshot = snapshot; delete.k.size = copy->k.size; copy->k.p.offset += shift; copy->k.p.snapshot = snapshot; op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); btree_err: bch2_disk_reservation_put(c, &disk_res); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; pos = le64_to_cpu(op->v.pos); } op->v.state = LOGGED_OP_FINSERT_finish; if (!insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, 0, 0, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } break; case LOGGED_OP_FINSERT_finish: break; } err: bch2_trans_iter_exit(trans, &iter); if (warn_errors) bch_err_fn(c, ret); return ret; } int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_finsert(trans, op_k, NULL); } int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, u64 offset, u64 len, bool insert, s64 *i_sectors_delta) { struct bkey_i_logged_op_finsert op; s64 shift = insert ? len : -len; bkey_logged_op_finsert_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.dst_offset = cpu_to_le64(offset + shift); op.v.src_offset = cpu_to_le64(offset); op.v.pos = cpu_to_le64(insert ? U64_MAX : offset); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); struct btree_trans *trans = bch2_trans_get(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) goto out; ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; out: bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; }
11512 11506 11514 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 // SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2022 Linutronix GmbH, John Ogness // Copyright (C) 2022 Intel, Thomas Gleixner #include <linux/atomic.h> #include <linux/bug.h> #include <linux/console.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/init.h> #include <linux/irqflags.h> #include <linux/kthread.h> #include <linux/minmax.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/slab.h> #include <linux/smp.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include "internal.h" #include "printk_ringbuffer.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. * * The state of the console is maintained in the "nbcon_state" atomic * variable. * * The console is locked when: * * - The 'prio' field contains the priority of the context that owns the * console. Only higher priority contexts are allowed to take over the * lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. * * - The 'cpu' field denotes on which CPU the console is locked. It is used * to prevent busy waiting on the same CPU. Also it informs the lock owner * that it has lost the lock in a more complex scenario when the lock was * taken over by a higher priority context, released, and taken on another * CPU with the same priority as the interrupted owner. * * The acquire mechanism uses a few more fields: * * - The 'req_prio' field is used by the handover approach to make the * current owner aware that there is a context with a higher priority * waiting for the friendly handover. * * - The 'unsafe' field allows to take over the console in a safe way in the * middle of emitting a message. The field is set only when accessing some * shared resources or when the console device is manipulated. It can be * cleared, for example, after emitting one character when the console * device is in a consistent state. * * - The 'unsafe_takeover' field is set when a hostile takeover took the * console in an unsafe state. The console will stay in the unsafe state * until re-initialized. * * The acquire mechanism uses three approaches: * * 1) Direct acquire when the console is not owned or is owned by a lower * priority context and is in a safe state. * * 2) Friendly handover mechanism uses a request/grant handshake. It is used * when the current owner has lower priority and the console is in an * unsafe state. * * The requesting context: * * a) Sets its priority into the 'req_prio' field. * * b) Waits (with a timeout) for the owning context to unlock the * console. * * c) Takes the lock and clears the 'req_prio' field. * * The owning context: * * a) Observes the 'req_prio' field set on exit from the unsafe * console state. * * b) Gives up console ownership by clearing the 'prio' field. * * 3) Unsafe hostile takeover allows to take over the lock even when the * console is an unsafe state. It is used only in panic() by the final * attempt to flush consoles in a try and hope mode. * * Note that separate record buffers are used in panic(). As a result, * the messages can be read and formatted without any risk even after * using the hostile takeover in unsafe state. * * The release function simply clears the 'prio' field. * * All operations on @console::nbcon_state are atomic cmpxchg based to * handle concurrency. * * The acquire/release functions implement only minimal policies: * * - Preference for higher priority contexts. * - Protection of the panic CPU. * * All other policy decisions must be made at the call sites: * * - What is marked as an unsafe section. * - Whether to spin-wait if there is already an owner and the console is * in an unsafe state. * - Whether to attempt an unsafe hostile takeover. * * The design allows to implement the well known: * * acquire() * output_one_printk_record() * release() * * The output of one printk record might be interrupted with a higher priority * context. The new owner is supposed to reprint the entire interrupted record * from scratch. */ /** * nbcon_state_set - Helper function to set the console state * @con: Console to update * @new: The new state to write * * Only to be used when the console is not yet or no longer visible in the * system. Otherwise use nbcon_state_try_cmpxchg(). */ static inline void nbcon_state_set(struct console *con, struct nbcon_state *new) { atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom); } /** * nbcon_state_read - Helper function to read the console state * @con: Console to read * @state: The state to store the result */ static inline void nbcon_state_read(struct console *con, struct nbcon_state *state) { state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state)); } /** * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state * @con: Console to update * @cur: Old/expected state * @new: New state * * Return: True on success. False on fail and @cur is updated. */ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur, struct nbcon_state *new) { return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } /** * nbcon_seq_read - Read the current console sequence * @con: Console to read the sequence of * * Return: Sequence number of the next record to print on @con. */ u64 nbcon_seq_read(struct console *con) { unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); return __ulseq_to_u64seq(prb, nbcon_seq); } /** * nbcon_seq_force - Force console sequence to a specific value * @con: Console to work on * @seq: Sequence number value to set * * Only to be used during init (before registration) or in extreme situations * (such as panic with CONSOLE_REPLAY_ALL). */ void nbcon_seq_force(struct console *con, u64 seq) { /* * If the specified record no longer exists, the oldest available record * is chosen. This is especially important on 32bit systems because only * the lower 32 bits of the sequence number are stored. The upper 32 bits * are derived from the sequence numbers available in the ringbuffer. */ u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); } /** * nbcon_seq_try_update - Try to update the console sequence number * @ctxt: Pointer to an acquire context that contains * all information about the acquire mode * @new_seq: The new sequence number to set * * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to * the 64bit value). This could be a different value than @new_seq if * nbcon_seq_force() was used or the current context no longer owns the * console. In the later case, it will stop printing anyway. */ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); struct console *con = ctxt->console; if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, __u64seq_to_ulseq(new_seq))) { ctxt->seq = new_seq; } else { ctxt->seq = nbcon_seq_read(con); } } /** * nbcon_context_try_acquire_direct - Try to acquire directly * @ctxt: The context of the caller * @cur: The current console state * * Acquire the console when it is released. Also acquire the console when * the current owner has a lower priority and the console is in a safe state. * * Return: 0 on success. Otherwise, an error code on failure. Also @cur * is updated to the latest state when failed to modify it. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU. * Or the current owner or waiter has the same or higher * priority. No acquire method can be successful in * this case. * * -EBUSY: The current owner has a lower priority but the console * in an unsafe state. The caller should try using * the handover acquire method. */ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; do { /* * Panic does not imply that the console is owned. However, it * is critical that non-panic CPUs during panic are unable to * acquire ownership in order to satisfy the assumptions of * nbcon_waiter_matches(). In particular, the assumption that * lower priorities are ignored during panic. */ if (other_cpu_in_panic()) return -EPERM; if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) return -EPERM; if (cur->unsafe) return -EBUSY; /* * The console should never be safe for a direct acquire * if an unsafe hostile takeover has ever happened. */ WARN_ON_ONCE(cur->unsafe_takeover); new.atom = cur->atom; new.prio = ctxt->prio; new.req_prio = NBCON_PRIO_NONE; new.unsafe = cur->unsafe_takeover; new.cpu = cpu; } while (!nbcon_state_try_cmpxchg(con, cur, &new)); return 0; } static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) { /* * The request context is well defined by the @req_prio because: * * - Only a context with a priority higher than the owner can become * a waiter. * - Only a context with a priority higher than the waiter can * directly take over the request. * - There are only three priorities. * - Only one CPU is allowed to request PANIC priority. * - Lower priorities are ignored during panic() until reboot. * * As a result, the following scenario is *not* possible: * * 1. This context is currently a waiter. * 2. Another context with a higher priority than this context * directly takes ownership. * 3. The higher priority context releases the ownership. * 4. Another lower priority context takes the ownership. * 5. Another context with the same priority as this context * creates a request and starts waiting. * * Event #1 implies this context is EMERGENCY. * Event #2 implies the new context is PANIC. * Event #3 occurs when panic() has flushed the console. * Events #4 and #5 are not possible due to the other_cpu_in_panic() * check in nbcon_context_try_acquire_direct(). */ return (cur->req_prio == expected_prio); } /** * nbcon_context_try_acquire_requested - Try to acquire after having * requested a handover * @ctxt: The context of the caller * @cur: The current console state * * This is a helper function for nbcon_context_try_acquire_handover(). * It is called when the console is in an unsafe state. The current * owner will release the console on exit from the unsafe region. * * Return: 0 on success and @cur is updated to the new console state. * Otherwise an error code on failure. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU * or this context is no longer the waiter. * * -EBUSY: The console is still locked. The caller should * continue waiting. * * Note: The caller must still remove the request when an error has occurred * except when this context is no longer the waiter. */ static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; /* Note that the caller must still remove the request! */ if (other_cpu_in_panic()) return -EPERM; /* * Note that the waiter will also change if there was an unsafe * hostile takeover. */ if (!nbcon_waiter_matches(cur, ctxt->prio)) return -EPERM; /* If still locked, caller should continue waiting. */ if (cur->prio != NBCON_PRIO_NONE) return -EBUSY; /* * The previous owner should have never released ownership * in an unsafe region. */ WARN_ON_ONCE(cur->unsafe); new.atom = cur->atom; new.prio = ctxt->prio; new.req_prio = NBCON_PRIO_NONE; new.unsafe = cur->unsafe_takeover; new.cpu = cpu; if (!nbcon_state_try_cmpxchg(con, cur, &new)) { /* * The acquire could fail only when it has been taken * over by a higher priority context. */ WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio)); return -EPERM; } /* Handover success. This context now owns the console. */ return 0; } /** * nbcon_context_try_acquire_handover - Try to acquire via handover * @ctxt: The context of the caller * @cur: The current console state * * The function must be called only when the context has higher priority * than the current owner and the console is in an unsafe state. * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY. * * The function sets "req_prio" field to make the current owner aware of * the request. Then it waits until the current owner releases the console, * or an even higher context takes over the request, or timeout expires. * * The current owner checks the "req_prio" field on exit from the unsafe * region and releases the console. It does not touch the "req_prio" field * so that the console stays reserved for the waiter. * * Return: 0 on success. Otherwise, an error code on failure. Also @cur * is updated to the latest state when failed to modify it. * * Errors: * * -EPERM: A panic is in progress and this is not the panic CPU. * Or a higher priority context has taken over the * console or the handover request. * * -EBUSY: The current owner is on the same CPU so that the hand * shake could not work. Or the current owner is not * willing to wait (zero timeout). Or the console does * not enter the safe state before timeout passed. The * caller might still use the unsafe hostile takeover * when allowed. * * -EAGAIN: @cur has changed when creating the handover request. * The caller should retry with direct acquire. */ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; int timeout; int request_err = -EBUSY; /* * Check that the handover is called when the direct acquire failed * with -EBUSY. */ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(!cur->unsafe); /* Handover is not possible on the same CPU. */ if (cur->cpu == cpu) return -EBUSY; /* * Console stays unsafe after an unsafe takeover until re-initialized. * Waiting is not going to help in this case. */ if (cur->unsafe_takeover) return -EBUSY; /* Is the caller willing to wait? */ if (ctxt->spinwait_max_us == 0) return -EBUSY; /* * Setup a request for the handover. The caller should try to acquire * the console directly when the current state has been modified. */ new.atom = cur->atom; new.req_prio = ctxt->prio; if (!nbcon_state_try_cmpxchg(con, cur, &new)) return -EAGAIN; cur->atom = new.atom; /* Wait until there is no owner and then acquire the console. */ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { /* On successful acquire, this request is cleared. */ request_err = nbcon_context_try_acquire_requested(ctxt, cur); if (!request_err) return 0; /* * If the acquire should be aborted, it must be ensured * that the request is removed before returning to caller. */ if (request_err == -EPERM) break; udelay(1); /* Re-read the state because some time has passed. */ nbcon_state_read(con, cur); } /* Timed out or aborted. Carefully remove handover request. */ do { /* * No need to remove request if there is a new waiter. This * can only happen if a higher priority context has taken over * the console or the handover request. */ if (!nbcon_waiter_matches(cur, ctxt->prio)) return -EPERM; /* Unset request for handover. */ new.atom = cur->atom; new.req_prio = NBCON_PRIO_NONE; if (nbcon_state_try_cmpxchg(con, cur, &new)) { /* * Request successfully unset. Report failure of * acquiring via handover. */ cur->atom = new.atom; return request_err; } /* * Unable to remove request. Try to acquire in case * the owner has released the lock. */ } while (nbcon_context_try_acquire_requested(ctxt, cur)); /* Lucky timing. The acquire succeeded while removing the request. */ return 0; } /** * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover * @ctxt: The context of the caller * @cur: The current console state * * Acquire the console even in the unsafe state. * * It can be permitted by setting the 'allow_unsafe_takeover' field only * by the final attempt to flush messages in panic(). * * Return: 0 on success. -EPERM when not allowed by the context. */ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state new; if (!ctxt->allow_unsafe_takeover) return -EPERM; /* Ensure caller is allowed to perform unsafe hostile takeovers. */ if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC)) return -EPERM; /* * Check that try_acquire_direct() and try_acquire_handover() returned * -EBUSY in the right situation. */ WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); WARN_ON_ONCE(cur->unsafe != true); do { new.atom = cur->atom; new.cpu = cpu; new.prio = ctxt->prio; new.unsafe |= cur->unsafe_takeover; new.unsafe_takeover |= cur->unsafe; } while (!nbcon_state_try_cmpxchg(con, cur, &new)); return 0; } static struct printk_buffers panic_nbcon_pbufs; /** * nbcon_context_try_acquire - Try to acquire nbcon console * @ctxt: The context of the caller * * Context: Under @ctxt->con->device_lock() or local_irq_save(). * Return: True if the console was acquired. False otherwise. * * If the caller allowed an unsafe hostile takeover, on success the * caller should check the current console state to see if it is * in an unsafe state. Otherwise, on success the caller may assume * the console is not in an unsafe state. */ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; int err; nbcon_state_read(con, &cur); try_again: err = nbcon_context_try_acquire_direct(ctxt, &cur); if (err != -EBUSY) goto out; err = nbcon_context_try_acquire_handover(ctxt, &cur); if (err == -EAGAIN) goto try_again; if (err != -EBUSY) goto out; err = nbcon_context_try_acquire_hostile(ctxt, &cur); out: if (err) return false; /* Acquire succeeded. */ /* Assign the appropriate buffer for this context. */ if (atomic_read(&panic_cpu) == cpu) ctxt->pbufs = &panic_nbcon_pbufs; else ctxt->pbufs = con->pbufs; /* Set the record sequence for this context to print. */ ctxt->seq = nbcon_seq_read(ctxt->console); return true; } static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, int expected_prio) { /* * A similar function, nbcon_waiter_matches(), only deals with * EMERGENCY and PANIC priorities. However, this function must also * deal with the NORMAL priority, which requires additional checks * and constraints. * * For the case where preemption and interrupts are disabled, it is * enough to also verify that the owning CPU has not changed. * * For the case where preemption or interrupts are enabled, an * external synchronization method *must* be used. In particular, * the driver-specific locking mechanism used in device_lock() * (including disabling migration) should be used. It prevents * scenarios such as: * * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and * is scheduled out. * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY * and releases it. * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X] * and is scheduled out. * 4. [Task A] gets running on [CPU X] and sees that the console is * still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus * [Task A] thinks it is the owner when it is not. */ if (cur->prio != expected_prio) return false; if (cur->cpu != expected_cpu) return false; return true; } /** * nbcon_context_release - Release the console * @ctxt: The nbcon context from nbcon_context_try_acquire() */ static void nbcon_context_release(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; struct nbcon_state new; nbcon_state_read(con, &cur); do { if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) break; new.atom = cur.atom; new.prio = NBCON_PRIO_NONE; /* * If @unsafe_takeover is set, it is kept set so that * the state remains permanently unsafe. */ new.unsafe |= cur.unsafe_takeover; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); ctxt->pbufs = NULL; } /** * nbcon_context_can_proceed - Check whether ownership can proceed * @ctxt: The nbcon context from nbcon_context_try_acquire() * @cur: The current console state * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * Must be invoked when entering the unsafe state to make sure that it still * owns the lock. Also must be invoked when exiting the unsafe context * to eventually free the lock for a higher priority context which asked * for the friendly handover. * * It can be called inside an unsafe section when the console is just * temporary in safe state instead of exiting and entering the unsafe * state. * * Also it can be called in the safe context before doing an expensive * safe operation. It does not make sense to do the operation when * a higher priority context took the lock. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur) { unsigned int cpu = smp_processor_id(); /* Make sure this context still owns the console. */ if (!nbcon_owner_matches(cur, cpu, ctxt->prio)) return false; /* The console owner can proceed if there is no waiter. */ if (cur->req_prio == NBCON_PRIO_NONE) return true; /* * A console owner within an unsafe region is always allowed to * proceed, even if there are waiters. It can perform a handover * when exiting the unsafe region. Otherwise the waiter will * need to perform an unsafe hostile takeover. */ if (cur->unsafe) return true; /* Waiters always have higher priorities than owners. */ WARN_ON_ONCE(cur->req_prio <= cur->prio); /* * Having a safe point for take over and eventually a few * duplicated characters or a full line is way better than a * hostile takeover. Post processing can take care of the garbage. * Release and hand over. */ nbcon_context_release(ctxt); /* * It is not clear whether the waiter really took over ownership. The * outermost callsite must make the final decision whether console * ownership is needed for it to proceed. If yes, it must reacquire * ownership (possibly hostile) before carefully proceeding. * * The calling context no longer owns the console so go back all the * way instead of trying to implement reacquire heuristics in tons of * places. */ return false; } /** * nbcon_can_proceed - Check whether ownership can proceed * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * It is used in nbcon_enter_unsafe() to make sure that it still owns the * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock * for a higher priority context which asked for the friendly handover. * * It can be called inside an unsafe section when the console is just * temporary in safe state instead of exiting and entering the unsafe state. * * Also it can be called in the safe context before doing an expensive safe * operation. It does not make sense to do the operation when a higher * priority context took the lock. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; struct nbcon_state cur; nbcon_state_read(con, &cur); return nbcon_context_can_proceed(ctxt, &cur); } EXPORT_SYMBOL_GPL(nbcon_can_proceed); #define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) #define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) /** * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state * @ctxt: The nbcon context from nbcon_context_try_acquire() * @unsafe: The new value for the unsafe bit * * Return: True if the unsafe state was updated and this context still * owns the console. Otherwise false if ownership was handed * over or taken. * * This function allows console owners to modify the unsafe status of the * console. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. * * Internal helper to avoid duplicated code. */ static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) { struct console *con = ctxt->console; struct nbcon_state cur; struct nbcon_state new; nbcon_state_read(con, &cur); do { /* * The unsafe bit must not be cleared if an * unsafe hostile takeover has occurred. */ if (!unsafe && cur.unsafe_takeover) goto out; if (!nbcon_context_can_proceed(ctxt, &cur)) return false; new.atom = cur.atom; new.unsafe = unsafe; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); cur.atom = new.atom; out: return nbcon_context_can_proceed(ctxt, &cur); } static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt, char *buf, unsigned int len) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; struct nbcon_state cur; wctxt->outbuf = buf; wctxt->len = len; nbcon_state_read(con, &cur); wctxt->unsafe_takeover = cur.unsafe_takeover; } /** * nbcon_enter_unsafe - Enter an unsafe region in the driver * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); bool is_owner; is_owner = nbcon_context_enter_unsafe(ctxt); if (!is_owner) nbcon_write_context_set_buf(wctxt, NULL, 0); return is_owner; } EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); /** * nbcon_exit_unsafe - Exit an unsafe region in the driver * @wctxt: The write context that was handed to the write function * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. */ bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); bool ret; ret = nbcon_context_exit_unsafe(ctxt); if (!ret) nbcon_write_context_set_buf(wctxt, NULL, 0); return ret; } EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); /** * nbcon_reacquire_nobuf - Reacquire a console after losing ownership * while printing * @wctxt: The write context that was handed to the write callback * * Since ownership can be lost at any time due to handover or takeover, a * printing context _must_ be prepared to back out immediately and * carefully. However, there are scenarios where the printing context must * reacquire ownership in order to finalize or revert hardware changes. * * This function allows a printing context to reacquire ownership using the * same priority as its previous ownership. * * Note that after a successful reacquire the printing context will have no * output buffer because that has been lost. This function cannot be used to * resume printing. */ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); while (!nbcon_context_try_acquire(ctxt)) cpu_relax(); nbcon_write_context_set_buf(wctxt, NULL, 0); } EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); /** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function * @use_atomic: True if the write_atomic() callback is to be used * * Return: True if this context still owns the console. False if * ownership was handed over or taken. * * When this function returns false then the calling context no longer owns * the console and is no longer allowed to go forward. In this case it must * back out immediately and carefully. The buffer content is also no longer * trusted since it no longer belongs to the calling context. If the caller * wants to do more it must reacquire the console first. * * When true is returned, @wctxt->ctxt.backlog indicates whether there are * still records pending in the ringbuffer, */ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; struct printk_message pmsg = { .pbufs = ctxt->pbufs, }; unsigned long con_dropped; struct nbcon_state cur; unsigned long dropped; unsigned long ulseq; /* * This function should never be called for consoles that have not * implemented the necessary callback for writing: i.e. legacy * consoles and, when atomic, nbcon consoles with no write_atomic(). * Handle it as if ownership was lost and try to continue. * * Note that for nbcon consoles the write_thread() callback is * mandatory and was already checked in nbcon_alloc(). */ if (WARN_ON_ONCE((use_atomic && !con->write_atomic) || !(console_srcu_read_flags(con) & CON_NBCON))) { nbcon_context_release(ctxt); return false; } /* * The printk buffers are filled within an unsafe section. This * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from * clobbering each other. */ if (!nbcon_context_enter_unsafe(ctxt)) return false; ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true); if (!ctxt->backlog) return nbcon_context_exit_unsafe(ctxt); /* * @con->dropped is not protected in case of an unsafe hostile * takeover. In that situation the update can be racy so * annotate it accordingly. */ con_dropped = data_race(READ_ONCE(con->dropped)); dropped = con_dropped + pmsg.dropped; if (dropped && !is_extended) console_prepend_dropped(&pmsg, dropped); /* * If the previous owner was assigned the same record, this context * has taken over ownership and is replaying the record. Prepend a * message to let the user know the record is replayed. */ ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq)); if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) { console_prepend_replay(&pmsg); } else { /* * Ensure this context is still the owner before trying to * update @nbcon_prev_seq. Otherwise the value in @ulseq may * not be from the previous owner and instead be some later * value from the context that took over ownership. */ nbcon_state_read(con, &cur); if (!nbcon_context_can_proceed(ctxt, &cur)) return false; atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq, __u64seq_to_ulseq(pmsg.seq)); } if (!nbcon_context_exit_unsafe(ctxt)) return false; /* For skipped records just update seq/dropped in @con. */ if (pmsg.outbuf_len == 0) goto update_con; /* Initialize the write context for driver callbacks. */ nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); if (use_atomic) con->write_atomic(con, wctxt); else con->write_thread(con, wctxt); if (!wctxt->outbuf) { /* * Ownership was lost and reacquired by the driver. Handle it * as if ownership was lost. */ nbcon_context_release(ctxt); return false; } /* * Ownership may have been lost but _not_ reacquired by the driver. * This case is detected and handled when entering unsafe to update * dropped/seq values. */ /* * Since any dropped message was successfully output, reset the * dropped count for the console. */ dropped = 0; update_con: /* * The dropped count and the sequence number are updated within an * unsafe section. This limits update races to the panic context and * allows the panic context to win. */ if (!nbcon_context_enter_unsafe(ctxt)) return false; if (dropped != con_dropped) { /* Counterpart to the READ_ONCE() above. */ WRITE_ONCE(con->dropped, dropped); } nbcon_seq_try_update(ctxt, pmsg.seq + 1); return nbcon_context_exit_unsafe(ctxt); } /* * nbcon_emit_one - Print one record for an nbcon console using the * specified callback * @wctxt: An initialized write context struct to use for this context * @use_atomic: True if the write_atomic() callback is to be used * * Return: True, when a record has been printed and there are still * pending records. The caller might want to continue flushing. * * False, when there is no pending record, or when the console * context cannot be acquired, or the ownership has been lost. * The caller should give up. Either the job is done, cannot be * done, or will be handled by the owning context. * * This is an internal helper to handle the locking of the console before * calling nbcon_emit_next_record(). */ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); struct console *con = ctxt->console; unsigned long flags; bool ret = false; if (!use_atomic) { con->device_lock(con, &flags); /* * Ensure this stays on the CPU to make handover and * takeover possible. */ cant_migrate(); } if (!nbcon_context_try_acquire(ctxt)) goto out; /* * nbcon_emit_next_record() returns false when the console was * handed over or taken over. In both cases the context is no * longer valid. * * The higher priority printing context takes over responsibility * to print the pending records. */ if (!nbcon_emit_next_record(wctxt, use_atomic)) goto out; nbcon_context_release(ctxt); ret = ctxt->backlog; out: if (!use_atomic) con->device_unlock(con, flags); return ret; } /** * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup * @con: Console to operate on * @ctxt: The nbcon context from nbcon_context_try_acquire() * * Return: True if the thread should shutdown or if the console is * allowed to print and a record is available. False otherwise. * * After the thread wakes up, it must first check if it should shutdown before * attempting any printing. */ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt) { bool ret = false; short flags; int cookie; if (kthread_should_stop()) return true; cookie = console_srcu_read_lock(); flags = console_srcu_read_flags(con); if (console_is_usable(con, flags, false)) { /* Bring the sequence in @ctxt up to date */ ctxt->seq = nbcon_seq_read(con); ret = prb_read_valid(prb, ctxt->seq, NULL); } console_srcu_read_unlock(cookie); return ret; } /** * nbcon_kthread_func - The printer thread function * @__console: Console to operate on * * Return: 0 */ static int nbcon_kthread_func(void *__console) { struct console *con = __console; struct nbcon_write_context wctxt = { .ctxt.console = con, .ctxt.prio = NBCON_PRIO_NORMAL, }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); short con_flags; bool backlog; int cookie; wait_for_event: /* * Guarantee this task is visible on the rcuwait before * checking the wake condition. * * The full memory barrier within set_current_state() of * ___rcuwait_wait_event() pairs with the full memory * barrier within rcuwait_has_sleeper(). * * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A. */ rcuwait_wait_event(&con->rcuwait, nbcon_kthread_should_wakeup(con, ctxt), TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */ do { if (kthread_should_stop()) return 0; backlog = false; /* * Keep the srcu read lock around the entire operation so that * synchronize_srcu() can guarantee that the kthread stopped * or suspended printing. */ cookie = console_srcu_read_lock(); con_flags = console_srcu_read_flags(con); if (console_is_usable(con, con_flags, false)) backlog = nbcon_emit_one(&wctxt, false); console_srcu_read_unlock(cookie); cond_resched(); } while (backlog); goto wait_for_event; } /** * nbcon_irq_work - irq work to wake console printer thread * @irq_work: The irq work to operate on */ static void nbcon_irq_work(struct irq_work *irq_work) { struct console *con = container_of(irq_work, struct console, irq_work); nbcon_kthread_wake(con); } static inline bool rcuwait_has_sleeper(struct rcuwait *w) { /* * Guarantee any new records can be seen by tasks preparing to wait * before this context checks if the rcuwait is empty. * * This full memory barrier pairs with the full memory barrier within * set_current_state() of ___rcuwait_wait_event(), which is called * after prepare_to_rcuwait() adds the waiter but before it has * checked the wait condition. * * This pairs with nbcon_kthread_func:A. */ smp_mb(); /* LMM(rcuwait_has_sleeper:A) */ return rcuwait_active(w); } /** * nbcon_kthreads_wake - Wake up printing threads using irq_work */ void nbcon_kthreads_wake(void) { struct console *con; int cookie; if (!printk_kthreads_running) return; cookie = console_srcu_read_lock(); for_each_console_srcu(con) { if (!(console_srcu_read_flags(con) & CON_NBCON)) continue; /* * Only schedule irq_work if the printing thread is * actively waiting. If not waiting, the thread will * notice by itself that it has work to do. */ if (rcuwait_has_sleeper(&con->rcuwait)) irq_work_queue(&con->irq_work); } console_srcu_read_unlock(cookie); } /* * nbcon_kthread_stop - Stop a console printer thread * @con: Console to operate on */ void nbcon_kthread_stop(struct console *con) { lockdep_assert_console_list_lock_held(); if (!con->kthread) return; kthread_stop(con->kthread); con->kthread = NULL; } /** * nbcon_kthread_create - Create a console printer thread * @con: Console to operate on * * Return: True if the kthread was started or already exists. * Otherwise false and @con must not be registered. * * This function is called when it will be expected that nbcon consoles are * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL * will be no longer flushed by the legacy loop. This is why failure must * be fatal for console registration. * * If @con was already registered and this function fails, @con must be * unregistered before the global state variable @printk_kthreads_running * can be set. */ bool nbcon_kthread_create(struct console *con) { struct task_struct *kt; lockdep_assert_console_list_lock_held(); if (con->kthread) return true; kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index); if (WARN_ON(IS_ERR(kt))) { con_printk(KERN_ERR, con, "failed to start printing thread\n"); return false; } con->kthread = kt; /* * It is important that console printing threads are scheduled * shortly after a printk call and with generous runtime budgets. */ sched_set_normal(con->kthread, -20); return true; } /* Track the nbcon emergency nesting per CPU. */ static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting); static unsigned int early_nbcon_pcpu_emergency_nesting __initdata; /** * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer * * Context: For reading, any context. For writing, any context which could * not be migrated to another CPU. * Return: Either a pointer to the per CPU emergency nesting counter of * the current CPU or to the init data during early boot. * * The function is safe for reading per-CPU variables in any context because * preemption is disabled if the current CPU is in the emergency state. See * also nbcon_cpu_emergency_enter(). */ static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void) { /* * The value of __printk_percpu_data_ready gets set in normal * context and before SMP initialization. As a result it could * never change while inside an nbcon emergency section. */ if (!printk_percpu_data_ready()) return &early_nbcon_pcpu_emergency_nesting; return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting); } /** * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon * printing on the current CPU * * Context: Any context. * Return: The nbcon_prio to use for acquiring an nbcon console in this * context for printing. * * The function is safe for reading per-CPU data in any context because * preemption is disabled if the current CPU is in the emergency or panic * state. */ enum nbcon_prio nbcon_get_default_prio(void) { unsigned int *cpu_emergency_nesting; if (this_cpu_in_panic()) return NBCON_PRIO_PANIC; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); if (*cpu_emergency_nesting) return NBCON_PRIO_EMERGENCY; return NBCON_PRIO_NORMAL; } /** * nbcon_legacy_emit_next_record - Print one record for an nbcon console * in legacy contexts * @con: The console to print on * @handover: Will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding * both the console_lock and the SRCU read lock. Otherwise it * is set to false. * @cookie: The cookie from the SRCU read lock. * @use_atomic: Set true when called in an atomic or unknown context. * It affects which nbcon callback will be used: write_atomic() * or write_thread(). * * When false, the write_thread() callback is used and would be * called in a preemtible context unless disabled by the * device_lock. The legacy handover is not allowed in this mode. * * Context: Any context except NMI. * Return: True, when a record has been printed and there are still * pending records. The caller might want to continue flushing. * * False, when there is no pending record, or when the console * context cannot be acquired, or the ownership has been lost. * The caller should give up. Either the job is done, cannot be * done, or will be handled by the owning context. * * This function is meant to be called by console_flush_all() to print records * on nbcon consoles from legacy context (printing via console unlocking). * Essentially it is the nbcon version of console_emit_next_record(). */ bool nbcon_legacy_emit_next_record(struct console *con, bool *handover, int cookie, bool use_atomic) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); unsigned long flags; bool progress; ctxt->console = con; ctxt->prio = nbcon_get_default_prio(); if (use_atomic) { /* * In an atomic or unknown context, use the same procedure as * in console_emit_next_record(). It allows to handover. */ printk_safe_enter_irqsave(flags); console_lock_spinning_enable(); stop_critical_timings(); } progress = nbcon_emit_one(&wctxt, use_atomic); if (use_atomic) { start_critical_timings(); *handover = console_lock_spinning_disable_and_check(cookie); printk_safe_exit_irqrestore(flags); } else { /* Non-atomic does not perform legacy spinning handovers. */ *handover = false; } return progress; } /** * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * Return: 0 if @con was flushed up to @stop_seq Otherwise, error code on * failure. * * Errors: * * -EPERM: Unable to acquire console ownership. * * -EAGAIN: Another context took over ownership while printing. * * -ENOENT: A record before @stop_seq is not available. * * If flushing up to @stop_seq was not successful, it only makes sense for the * caller to try again when -EAGAIN was returned. When -EPERM is returned, * this context is not allowed to acquire the console. When -ENOENT is * returned, it cannot be expected that the unfinalized record will become * available. */ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, bool allow_unsafe_takeover) { struct nbcon_write_context wctxt = { }; struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); int err = 0; ctxt->console = con; ctxt->spinwait_max_us = 2000; ctxt->prio = nbcon_get_default_prio(); ctxt->allow_unsafe_takeover = allow_unsafe_takeover; if (!nbcon_context_try_acquire(ctxt)) return -EPERM; while (nbcon_seq_read(con) < stop_seq) { /* * nbcon_emit_next_record() returns false when the console was * handed over or taken over. In both cases the context is no * longer valid. */ if (!nbcon_emit_next_record(&wctxt, true)) return -EAGAIN; if (!ctxt->backlog) { /* Are there reserved but not yet finalized records? */ if (nbcon_seq_read(con) < stop_seq) err = -ENOENT; break; } } nbcon_context_release(ctxt); return err; } /** * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its * write_atomic() callback * @con: The nbcon console to flush * @stop_seq: Flush up until this record * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers * * This will stop flushing before @stop_seq if another context has ownership. * That context is then responsible for the flushing. Likewise, if new records * are added while this context was flushing and there is no other context * to handle the printing, this context must also flush those records. */ static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, bool allow_unsafe_takeover) { struct console_flush_type ft; unsigned long flags; int err; again: /* * Atomic flushing does not use console driver synchronization (i.e. * it does not hold the port lock for uart consoles). Therefore IRQs * must be disabled to avoid being interrupted and then calling into * a driver that will deadlock trying to acquire console ownership. */ local_irq_save(flags); err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); local_irq_restore(flags); /* * If there was a new owner (-EPERM, -EAGAIN), that context is * responsible for completing. * * Do not wait for records not yet finalized (-ENOENT) to avoid a * possible deadlock. They will either get flushed by the writer or * eventually skipped on panic CPU. */ if (err) return; /* * If flushing was successful but more records are available, this * context must flush those remaining records if the printer thread * is not available do it. */ printk_get_console_flush_type(&ft); if (!ft.nbcon_offload && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { stop_seq = prb_next_reserve_seq(prb); goto again; } } /** * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * @stop_seq: Flush up until this record * @allow_unsafe_takeover: True, to allow unsafe hostile takeovers */ static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover) { struct console *con; int cookie; cookie = console_srcu_read_lock(); for_each_console_srcu(con) { short flags = console_srcu_read_flags(con); if (!(flags & CON_NBCON)) continue; if (!console_is_usable(con, flags, true)) continue; if (nbcon_seq_read(con) >= stop_seq) continue; nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover); } console_srcu_read_unlock(cookie); } /** * nbcon_atomic_flush_pending - Flush all nbcon consoles using their * write_atomic() callback * * Flush the backlog up through the currently newest record. Any new * records added while flushing will not be flushed if there is another * context available to handle the flushing. This is to avoid one CPU * printing unbounded because other CPUs continue to add records. */ void nbcon_atomic_flush_pending(void) { __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false); } /** * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their * write_atomic() callback and allowing unsafe hostile takeovers * * Flush the backlog up through the currently newest record. Unsafe hostile * takeovers will be performed, if necessary. */ void nbcon_atomic_flush_unsafe(void) { __nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true); } /** * nbcon_cpu_emergency_enter - Enter an emergency section where printk() * messages for that CPU are flushed directly * * Context: Any context. Disables preemption. * * When within an emergency section, printk() calls will attempt to flush any * pending messages in the ringbuffer. */ void nbcon_cpu_emergency_enter(void) { unsigned int *cpu_emergency_nesting; preempt_disable(); cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); (*cpu_emergency_nesting)++; } /** * nbcon_cpu_emergency_exit - Exit an emergency section * * Context: Within an emergency section. Enables preemption. */ void nbcon_cpu_emergency_exit(void) { unsigned int *cpu_emergency_nesting; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) (*cpu_emergency_nesting)--; preempt_enable(); } /** * nbcon_alloc - Allocate and init the nbcon console specific data * @con: Console to initialize * * Return: True if the console was fully allocated and initialized. * Otherwise @con must not be registered. * * When allocation and init was successful, the console must be properly * freed using nbcon_free() once it is no longer needed. */ bool nbcon_alloc(struct console *con) { struct nbcon_state state = { }; /* The write_thread() callback is mandatory. */ if (WARN_ON(!con->write_thread)) return false; rcuwait_init(&con->rcuwait); init_irq_work(&con->irq_work, nbcon_irq_work); atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL); nbcon_state_set(con, &state); /* * Initialize @nbcon_seq to the highest possible sequence number so * that practically speaking it will have nothing to print until a * desired initial sequence number has been set via nbcon_seq_force(). */ atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb)); if (con->flags & CON_BOOT) { /* * Boot console printing is synchronized with legacy console * printing, so boot consoles can share the same global printk * buffers. */ con->pbufs = &printk_shared_pbufs; } else { con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); if (!con->pbufs) { con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); return false; } if (printk_kthreads_running) { if (!nbcon_kthread_create(con)) { kfree(con->pbufs); con->pbufs = NULL; return false; } } } return true; } /** * nbcon_free - Free and cleanup the nbcon console specific data * @con: Console to free/cleanup nbcon data */ void nbcon_free(struct console *con) { struct nbcon_state state = { }; if (printk_kthreads_running) nbcon_kthread_stop(con); nbcon_state_set(con, &state); /* Boot consoles share global printk buffers. */ if (!(con->flags & CON_BOOT)) kfree(con->pbufs); con->pbufs = NULL; } /** * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe * section * @con: The nbcon console to acquire * * Context: Under the locking mechanism implemented in * @con->device_lock() including disabling migration. * Return: True if the console was acquired. False otherwise. * * Console drivers will usually use their own internal synchronization * mechasism to synchronize between console printing and non-printing * activities (such as setting baud rates). However, nbcon console drivers * supporting atomic consoles may also want to mark unsafe sections when * performing non-printing activities in order to synchronize against their * atomic_write() callback. * * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL * and marks it unsafe for handover/takeover. */ bool nbcon_device_try_acquire(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); cant_migrate(); memset(ctxt, 0, sizeof(*ctxt)); ctxt->console = con; ctxt->prio = NBCON_PRIO_NORMAL; if (!nbcon_context_try_acquire(ctxt)) return false; if (!nbcon_context_enter_unsafe(ctxt)) return false; return true; } EXPORT_SYMBOL_GPL(nbcon_device_try_acquire); /** * nbcon_device_release - Exit unsafe section and release the nbcon console * @con: The nbcon console acquired in nbcon_device_try_acquire() */ void nbcon_device_release(struct console *con) { struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt); struct console_flush_type ft; int cookie; if (!nbcon_context_exit_unsafe(ctxt)) return; nbcon_context_release(ctxt); /* * This context must flush any new records added while the console * was locked if the printer thread is not available to do it. The * console_srcu_read_lock must be taken to ensure the console is * usable throughout flushing. */ cookie = console_srcu_read_lock(); printk_get_console_flush_type(&ft); if (console_is_usable(con, console_srcu_read_flags(con), true) && !ft.nbcon_offload && prb_read_valid(prb, nbcon_seq_read(con), NULL)) { /* * If nbcon_atomic flushing is not available, fallback to * using the legacy loop. */ if (ft.nbcon_atomic) { __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false); } else if (ft.legacy_direct) { if (console_trylock()) console_unlock(); } else if (ft.legacy_offload) { printk_trigger_flush(); } } console_srcu_read_unlock(cookie); } EXPORT_SYMBOL_GPL(nbcon_device_release);
1 1 4 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * * fragment.c */ /* * This file implements code to handle compressed fragments (tail-end packed * datablocks). * * Regular files contain a fragment index which is mapped to a fragment * location on disk and compressed size using a fragment lookup table. * Like everything in Squashfs this fragment lookup table is itself stored * compressed into metadata blocks. A second index table is used to locate * these. This second index table for speed of access (and because it * is small) is read at mount time and cached in memory. */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" /* * Look-up fragment using the fragment index table. Return the on disk * location of the fragment and its compressed size */ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, u64 *fragment_block) { struct squashfs_sb_info *msblk = sb->s_fs_info; int block, offset, size; struct squashfs_fragment_entry fragment_entry; u64 start_block; if (fragment >= msblk->fragments) return -EIO; block = SQUASHFS_FRAGMENT_INDEX(fragment); offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); start_block = le64_to_cpu(msblk->fragment_index[block]); size = squashfs_read_metadata(sb, &fragment_entry, &start_block, &offset, sizeof(fragment_entry)); if (size < 0) return size; *fragment_block = le64_to_cpu(fragment_entry.start_block); return squashfs_block_size(fragment_entry.size); } /* * Read the uncompressed fragment lookup table indexes off disk into memory */ __le64 *squashfs_read_fragment_index_table(struct super_block *sb, u64 fragment_table_start, u64 next_table, unsigned int fragments) { unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(fragments); __le64 *table; /* * Sanity check, length bytes should not extend into the next table - * this check also traps instances where fragment_table_start is * incorrectly larger than the next table start */ if (fragment_table_start + length > next_table) return ERR_PTR(-EINVAL); table = squashfs_read_table(sb, fragment_table_start, length); /* * table[0] points to the first fragment table metadata block, this * should be less than fragment_table_start */ if (!IS_ERR(table) && le64_to_cpu(table[0]) >= fragment_table_start) { kfree(table); return ERR_PTR(-EINVAL); } return table; }
11 2 10 1 16 1 18 28 1 29 6 18 18 1 18 4 5 29 4 19 24 9 25 24 24 3 1 4 10 8 4 3 5 2 3 1 2 1 2 2 10 3 1 16 1 22 20 2 14 2 2 16 22 38 37 1 14 24 13 1 13 2 8 39 1 37 7 29 36 1 33 5 1 13 14 15 1 2 1 12 2 4 8 5 5 8 12 10 2 2 5 11 26 21 5 5 5 18 2 1 10 27 1 1 1 1 8 24 21 5 1 1 8 2 15 9 3 2 1 14 10 8 12 2 2 3 2 21 13 22 14 7 14 12 2 29 7 1 6 7 1 4 245 245 245 244 232 228 209 2 25 17 25 25 222 47 29 16 11 7 30 25 7 30 30 30 5 5 36 1 1 31 21 3 2 31 30 26 8 3 25 1 2 5 3 1 2 4 10 10 1 9 5 4 1 3 4 26 25 3 20 2 7 6 4 10 6 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/nospec.h> #include <linux/hugetlb.h> #include <linux/compat.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "openclose.h" #include "rsrc.h" #include "memmap.h" #include "register.h" struct io_rsrc_update { struct file *file; u64 arg; u32 nr_args; u32 offset; }; static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, struct page **last_hpage); /* only define max */ #define IORING_MAX_FIXED_FILES (1U << 20) #define IORING_MAX_REG_BUFFERS (1U << 14) int __io_account_mem(struct user_struct *user, unsigned long nr_pages) { unsigned long page_limit, cur_pages, new_pages; if (!nr_pages) return 0; /* Don't allow more pages than we can safely lock */ page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; cur_pages = atomic_long_read(&user->locked_vm); do { new_pages = cur_pages + nr_pages; if (new_pages > page_limit) return -ENOMEM; } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); return 0; } static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) { if (ctx->user) __io_unaccount_mem(ctx->user, nr_pages); if (ctx->mm_account) atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); } static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) { int ret; if (ctx->user) { ret = __io_account_mem(ctx->user, nr_pages); if (ret) return ret; } if (ctx->mm_account) atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); return 0; } static int io_buffer_validate(struct iovec *iov) { unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); /* * Don't impose further limits on the size and buffer * constraints here, we'll -EINVAL later when IO is * submitted if they are wrong. */ if (!iov->iov_base) return iov->iov_len ? -EFAULT : 0; if (!iov->iov_len) return -EFAULT; /* arbitrary limit, but we need something */ if (iov->iov_len > SZ_1G) return -EFAULT; if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) return -EOVERFLOW; return 0; } static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { unsigned int i; if (node->buf) { struct io_mapped_ubuf *imu = node->buf; if (!refcount_dec_and_test(&imu->refs)) return; for (i = 0; i < imu->nr_bvecs; i++) unpin_user_page(imu->bvec[i].bv_page); if (imu->acct_pages) io_unaccount_mem(ctx, imu->acct_pages); kvfree(imu); } } struct io_rsrc_node *io_rsrc_node_alloc(int type) { struct io_rsrc_node *node; node = kzalloc(sizeof(*node), GFP_KERNEL); if (node) { node->type = type; node->refs = 1; } return node; } __cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data) { if (!data->nr) return; while (data->nr--) { if (data->nodes[data->nr]) io_put_rsrc_node(ctx, data->nodes[data->nr]); } kvfree(data->nodes); data->nodes = NULL; data->nr = 0; } __cold int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr) { data->nodes = kvmalloc_array(nr, sizeof(struct io_rsrc_node *), GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (data->nodes) { data->nr = nr; return 0; } return -ENOMEM; } static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned nr_args) { u64 __user *tags = u64_to_user_ptr(up->tags); __s32 __user *fds = u64_to_user_ptr(up->data); int fd, i, err = 0; unsigned int done; if (!ctx->file_table.data.nr) return -ENXIO; if (up->offset + nr_args > ctx->file_table.data.nr) return -EINVAL; for (done = 0; done < nr_args; done++) { u64 tag = 0; if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || copy_from_user(&fd, &fds[done], sizeof(fd))) { err = -EFAULT; break; } if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { err = -EINVAL; break; } if (fd == IORING_REGISTER_FILES_SKIP) continue; i = up->offset + done; if (io_reset_rsrc_node(ctx, &ctx->file_table.data, i)) io_file_bitmap_clear(&ctx->file_table, i); if (fd != -1) { struct file *file = fget(fd); struct io_rsrc_node *node; if (!file) { err = -EBADF; break; } /* * Don't allow io_uring instances to be registered. */ if (io_is_uring_fops(file)) { fput(file); err = -EBADF; break; } node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { err = -ENOMEM; fput(file); break; } ctx->file_table.data.nodes[i] = node; if (tag) node->tag = tag; io_fixed_file_set(node, file); io_file_bitmap_set(&ctx->file_table, i); } } return done ? done : err; } static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned int nr_args) { u64 __user *tags = u64_to_user_ptr(up->tags); struct iovec fast_iov, *iov; struct page *last_hpage = NULL; struct iovec __user *uvec; u64 user_data = up->data; __u32 done; int i, err; if (!ctx->buf_table.nr) return -ENXIO; if (up->offset + nr_args > ctx->buf_table.nr) return -EINVAL; for (done = 0; done < nr_args; done++) { struct io_rsrc_node *node; u64 tag = 0; uvec = u64_to_user_ptr(user_data); iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); if (IS_ERR(iov)) { err = PTR_ERR(iov); break; } if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { err = -EFAULT; break; } err = io_buffer_validate(iov); if (err) break; node = io_sqe_buffer_register(ctx, iov, &last_hpage); if (IS_ERR(node)) { err = PTR_ERR(node); break; } if (tag) { if (!node) { err = -EINVAL; break; } node->tag = tag; } i = array_index_nospec(up->offset + done, ctx->buf_table.nr); io_reset_rsrc_node(ctx, &ctx->buf_table, i); ctx->buf_table.nodes[i] = node; if (ctx->compat) user_data += sizeof(struct compat_iovec); else user_data += sizeof(struct iovec); } return done ? done : err; } static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, struct io_uring_rsrc_update2 *up, unsigned nr_args) { __u32 tmp; lockdep_assert_held(&ctx->uring_lock); if (check_add_overflow(up->offset, nr_args, &tmp)) return -EOVERFLOW; switch (type) { case IORING_RSRC_FILE: return __io_sqe_files_update(ctx, up, nr_args); case IORING_RSRC_BUFFER: return __io_sqe_buffers_update(ctx, up, nr_args); } return -EINVAL; } int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { struct io_uring_rsrc_update2 up; if (!nr_args) return -EINVAL; memset(&up, 0, sizeof(up)); if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) return -EFAULT; if (up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); } int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, unsigned size, unsigned type) { struct io_uring_rsrc_update2 up; if (size != sizeof(up)) return -EINVAL; if (copy_from_user(&up, arg, sizeof(up))) return -EFAULT; if (!up.nr || up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, type, &up, up.nr); } __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, unsigned int size, unsigned int type) { struct io_uring_rsrc_register rr; /* keep it extendible */ if (size != sizeof(rr)) return -EINVAL; memset(&rr, 0, sizeof(rr)); if (copy_from_user(&rr, arg, size)) return -EFAULT; if (!rr.nr || rr.resv2) return -EINVAL; if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE) return -EINVAL; switch (type) { case IORING_RSRC_FILE: if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) break; return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), rr.nr, u64_to_user_ptr(rr.tags)); case IORING_RSRC_BUFFER: if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) break; return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), rr.nr, u64_to_user_ptr(rr.tags)); } return -EINVAL; } int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; if (sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; up->offset = READ_ONCE(sqe->off); up->nr_args = READ_ONCE(sqe->len); if (!up->nr_args) return -EINVAL; up->arg = READ_ONCE(sqe->addr); return 0; } static int io_files_update_with_index_alloc(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); __s32 __user *fds = u64_to_user_ptr(up->arg); unsigned int done; struct file *file; int ret, fd; if (!req->ctx->file_table.data.nr) return -ENXIO; for (done = 0; done < up->nr_args; done++) { if (copy_from_user(&fd, &fds[done], sizeof(fd))) { ret = -EFAULT; break; } file = fget(fd); if (!file) { ret = -EBADF; break; } ret = io_fixed_fd_install(req, issue_flags, file, IORING_FILE_INDEX_ALLOC); if (ret < 0) break; if (copy_to_user(&fds[done], &ret, sizeof(ret))) { __io_close_fixed(req->ctx, issue_flags, ret); ret = -EFAULT; break; } } if (done) return done; return ret; } int io_files_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); struct io_ring_ctx *ctx = req->ctx; struct io_uring_rsrc_update2 up2; int ret; up2.offset = up->offset; up2.data = up->arg; up2.nr = 0; up2.tags = 0; up2.resv = 0; up2.resv2 = 0; if (up->offset == IORING_FILE_INDEX_ALLOC) { ret = io_files_update_with_index_alloc(req, issue_flags); } else { io_ring_submit_lock(ctx, issue_flags); ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up2, up->nr_args); io_ring_submit_unlock(ctx, issue_flags); } if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; } void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { if (node->tag) io_post_aux_cqe(ctx, node->tag, 0, 0); switch (node->type) { case IORING_RSRC_FILE: if (io_slot_file(node)) fput(io_slot_file(node)); break; case IORING_RSRC_BUFFER: if (node->buf) io_buffer_unmap(ctx, node); break; default: WARN_ON_ONCE(1); break; } kfree(node); } int io_sqe_files_unregister(struct io_ring_ctx *ctx) { if (!ctx->file_table.data.nr) return -ENXIO; io_free_file_tables(ctx, &ctx->file_table); io_file_table_set_alloc_range(ctx, 0, 0); return 0; } int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags) { __s32 __user *fds = (__s32 __user *) arg; struct file *file; int fd, ret; unsigned i; if (ctx->file_table.data.nr) return -EBUSY; if (!nr_args) return -EINVAL; if (nr_args > IORING_MAX_FIXED_FILES) return -EMFILE; if (nr_args > rlimit(RLIMIT_NOFILE)) return -EMFILE; if (!io_alloc_file_tables(ctx, &ctx->file_table, nr_args)) return -ENOMEM; for (i = 0; i < nr_args; i++) { struct io_rsrc_node *node; u64 tag = 0; ret = -EFAULT; if (tags && copy_from_user(&tag, &tags[i], sizeof(tag))) goto fail; if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) goto fail; /* allow sparse sets */ if (!fds || fd == -1) { ret = -EINVAL; if (tag) goto fail; continue; } file = fget(fd); ret = -EBADF; if (unlikely(!file)) goto fail; /* * Don't allow io_uring instances to be registered. */ if (io_is_uring_fops(file)) { fput(file); goto fail; } ret = -ENOMEM; node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { fput(file); goto fail; } if (tag) node->tag = tag; ctx->file_table.data.nodes[i] = node; io_fixed_file_set(node, file); io_file_bitmap_set(&ctx->file_table, i); } /* default it to the whole table */ io_file_table_set_alloc_range(ctx, 0, ctx->file_table.data.nr); return 0; fail: io_sqe_files_unregister(ctx); return ret; } int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) { if (!ctx->buf_table.nr) return -ENXIO; io_rsrc_data_free(ctx, &ctx->buf_table); return 0; } /* * Not super efficient, but this is just a registration time. And we do cache * the last compound head, so generally we'll only do a full search if we don't * match that one. * * We check if the given compound head page has already been accounted, to * avoid double accounting it. This allows us to account the full size of the * page, not just the constituent pages of a huge page. */ static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, int nr_pages, struct page *hpage) { int i, j; /* check current page array */ for (i = 0; i < nr_pages; i++) { if (!PageCompound(pages[i])) continue; if (compound_head(pages[i]) == hpage) return true; } /* check previously registered pages */ for (i = 0; i < ctx->buf_table.nr; i++) { struct io_rsrc_node *node = ctx->buf_table.nodes[i]; struct io_mapped_ubuf *imu; if (!node) continue; imu = node->buf; for (j = 0; j < imu->nr_bvecs; j++) { if (!PageCompound(imu->bvec[j].bv_page)) continue; if (compound_head(imu->bvec[j].bv_page) == hpage) return true; } } return false; } static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, int nr_pages, struct io_mapped_ubuf *imu, struct page **last_hpage) { int i, ret; imu->acct_pages = 0; for (i = 0; i < nr_pages; i++) { if (!PageCompound(pages[i])) { imu->acct_pages++; } else { struct page *hpage; hpage = compound_head(pages[i]); if (hpage == *last_hpage) continue; *last_hpage = hpage; if (headpage_already_acct(ctx, pages, i, hpage)) continue; imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; } } if (!imu->acct_pages) return 0; ret = io_account_mem(ctx, imu->acct_pages); if (ret) imu->acct_pages = 0; return ret; } static bool io_coalesce_buffer(struct page ***pages, int *nr_pages, struct io_imu_folio_data *data) { struct page **page_array = *pages, **new_array = NULL; int nr_pages_left = *nr_pages, i, j; int nr_folios = data->nr_folios; /* Store head pages only*/ new_array = kvmalloc_array(nr_folios, sizeof(struct page *), GFP_KERNEL); if (!new_array) return false; new_array[0] = compound_head(page_array[0]); /* * The pages are bound to the folio, it doesn't * actually unpin them but drops all but one reference, * which is usually put down by io_buffer_unmap(). * Note, needs a better helper. */ if (data->nr_pages_head > 1) unpin_user_pages(&page_array[1], data->nr_pages_head - 1); j = data->nr_pages_head; nr_pages_left -= data->nr_pages_head; for (i = 1; i < nr_folios; i++) { unsigned int nr_unpin; new_array[i] = page_array[j]; nr_unpin = min_t(unsigned int, nr_pages_left - 1, data->nr_pages_mid - 1); if (nr_unpin) unpin_user_pages(&page_array[j+1], nr_unpin); j += data->nr_pages_mid; nr_pages_left -= data->nr_pages_mid; } kvfree(page_array); *pages = new_array; *nr_pages = nr_folios; return true; } bool io_check_coalesce_buffer(struct page **page_array, int nr_pages, struct io_imu_folio_data *data) { struct folio *folio = page_folio(page_array[0]); unsigned int count = 1, nr_folios = 1; int i; data->nr_pages_mid = folio_nr_pages(folio); data->folio_shift = folio_shift(folio); /* * Check if pages are contiguous inside a folio, and all folios have * the same page count except for the head and tail. */ for (i = 1; i < nr_pages; i++) { if (page_folio(page_array[i]) == folio && page_array[i] == page_array[i-1] + 1) { count++; continue; } if (nr_folios == 1) { if (folio_page_idx(folio, page_array[i-1]) != data->nr_pages_mid - 1) return false; data->nr_pages_head = count; } else if (count != data->nr_pages_mid) { return false; } folio = page_folio(page_array[i]); if (folio_size(folio) != (1UL << data->folio_shift) || folio_page_idx(folio, page_array[i]) != 0) return false; count = 1; nr_folios++; } if (nr_folios == 1) data->nr_pages_head = count; data->nr_folios = nr_folios; return true; } static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, struct page **last_hpage) { struct io_mapped_ubuf *imu = NULL; struct page **pages = NULL; struct io_rsrc_node *node; unsigned long off; size_t size; int ret, nr_pages, i; struct io_imu_folio_data data; bool coalesced = false; if (!iov->iov_base) return NULL; node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!node) return ERR_PTR(-ENOMEM); node->buf = NULL; ret = -ENOMEM; pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len, &nr_pages); if (IS_ERR(pages)) { ret = PTR_ERR(pages); pages = NULL; goto done; } /* If it's huge page(s), try to coalesce them into fewer bvec entries */ if (nr_pages > 1 && io_check_coalesce_buffer(pages, nr_pages, &data)) { if (data.nr_pages_mid != 1) coalesced = io_coalesce_buffer(&pages, &nr_pages, &data); } imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); if (!imu) goto done; ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); if (ret) { unpin_user_pages(pages, nr_pages); goto done; } size = iov->iov_len; /* store original address for later verification */ imu->ubuf = (unsigned long) iov->iov_base; imu->len = iov->iov_len; imu->nr_bvecs = nr_pages; imu->folio_shift = PAGE_SHIFT; if (coalesced) imu->folio_shift = data.folio_shift; refcount_set(&imu->refs, 1); off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1); node->buf = imu; ret = 0; for (i = 0; i < nr_pages; i++) { size_t vec_len; vec_len = min_t(size_t, size, (1UL << imu->folio_shift) - off); bvec_set_page(&imu->bvec[i], pages[i], vec_len, off); off = 0; size -= vec_len; } done: if (ret) { kvfree(imu); if (node) io_put_rsrc_node(ctx, node); node = ERR_PTR(ret); } kvfree(pages); return node; } int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, unsigned int nr_args, u64 __user *tags) { struct page *last_hpage = NULL; struct io_rsrc_data data; struct iovec fast_iov, *iov = &fast_iov; const struct iovec __user *uvec; int i, ret; BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); if (ctx->buf_table.nr) return -EBUSY; if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) return -EINVAL; ret = io_rsrc_data_alloc(&data, nr_args); if (ret) return ret; if (!arg) memset(iov, 0, sizeof(*iov)); for (i = 0; i < nr_args; i++) { struct io_rsrc_node *node; u64 tag = 0; if (arg) { uvec = (struct iovec __user *) arg; iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); if (IS_ERR(iov)) { ret = PTR_ERR(iov); break; } ret = io_buffer_validate(iov); if (ret) break; if (ctx->compat) arg += sizeof(struct compat_iovec); else arg += sizeof(struct iovec); } if (tags) { if (copy_from_user(&tag, &tags[i], sizeof(tag))) { ret = -EFAULT; break; } } node = io_sqe_buffer_register(ctx, iov, &last_hpage); if (IS_ERR(node)) { ret = PTR_ERR(node); break; } if (tag) { if (!node) { ret = -EINVAL; break; } node->tag = tag; } data.nodes[i] = node; } ctx->buf_table = data; if (ret) io_sqe_buffers_unregister(ctx); return ret; } int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, u64 buf_addr, size_t len) { u64 buf_end; size_t offset; if (WARN_ON_ONCE(!imu)) return -EFAULT; if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) return -EFAULT; /* not inside the mapped region */ if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len))) return -EFAULT; /* * Might not be a start of buffer, set size appropriately * and advance us to the beginning. */ offset = buf_addr - imu->ubuf; iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, len); if (offset) { /* * Don't use iov_iter_advance() here, as it's really slow for * using the latter parts of a big fixed buffer - it iterates * over each segment manually. We can cheat a bit here, because * we know that: * * 1) it's a BVEC iter, we set it up * 2) all bvecs are the same in size, except potentially the * first and last bvec * * So just find our index, and adjust the iterator afterwards. * If the offset is within the first bvec (or the whole first * bvec, just use iov_iter_advance(). This makes it easier * since we can just skip the first segment, which may not * be folio_size aligned. */ const struct bio_vec *bvec = imu->bvec; if (offset < bvec->bv_len) { iter->iov_offset = offset; } else { unsigned long seg_skip; /* skip first vec */ offset -= bvec->bv_len; seg_skip = 1 + (offset >> imu->folio_shift); iter->bvec += seg_skip; iter->nr_segs -= seg_skip; iter->iov_offset = offset & ((1UL << imu->folio_shift) - 1); } } return 0; } /* Lock two rings at once. The rings must be different! */ static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2) { if (ctx1 > ctx2) swap(ctx1, ctx2); mutex_lock(&ctx1->uring_lock); mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING); } /* Both rings are locked by the caller. */ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx, struct io_uring_clone_buffers *arg) { struct io_rsrc_data data; int i, ret, off, nr; unsigned int nbufs; lockdep_assert_held(&ctx->uring_lock); lockdep_assert_held(&src_ctx->uring_lock); /* * Accounting state is shared between the two rings; that only works if * both rings are accounted towards the same counters. */ if (ctx->user != src_ctx->user || ctx->mm_account != src_ctx->mm_account) return -EINVAL; /* if offsets are given, must have nr specified too */ if (!arg->nr && (arg->dst_off || arg->src_off)) return -EINVAL; /* not allowed unless REPLACE is set */ if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE)) return -EBUSY; nbufs = src_ctx->buf_table.nr; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) return -EINVAL; else if (arg->nr > IORING_MAX_REG_BUFFERS) return -EINVAL; if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) return -EOVERFLOW; ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr)); if (ret) return ret; /* Fill entries in data from dst that won't overlap with src */ for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) { struct io_rsrc_node *src_node = ctx->buf_table.nodes[i]; if (src_node) { data.nodes[i] = src_node; src_node->refs++; } } ret = -ENXIO; nbufs = src_ctx->buf_table.nr; if (!nbufs) goto out_free; ret = -EINVAL; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) goto out_free; ret = -EOVERFLOW; if (check_add_overflow(arg->nr, arg->src_off, &off)) goto out_free; if (off > nbufs) goto out_free; off = arg->dst_off; i = arg->src_off; nr = arg->nr; while (nr--) { struct io_rsrc_node *dst_node, *src_node; src_node = io_rsrc_node_lookup(&src_ctx->buf_table, i); if (!src_node) { dst_node = NULL; } else { dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!dst_node) { ret = -ENOMEM; goto out_free; } refcount_inc(&src_node->buf->refs); dst_node->buf = src_node->buf; } data.nodes[off++] = dst_node; i++; } /* * If asked for replace, put the old table. data->nodes[] holds both * old and new nodes at this point. */ if (arg->flags & IORING_REGISTER_DST_REPLACE) io_rsrc_data_free(ctx, &ctx->buf_table); /* * ctx->buf_table must be empty now - either the contents are being * replaced and we just freed the table, or the contents are being * copied to a ring that does not have buffers yet (checked at function * entry). */ WARN_ON_ONCE(ctx->buf_table.nr); ctx->buf_table = data; return 0; out_free: io_rsrc_data_free(ctx, &data); return ret; } /* * Copy the registered buffers from the source ring whose file descriptor * is given in the src_fd to the current ring. This is identical to registering * the buffers with ctx, except faster as mappings already exist. * * Since the memory is already accounted once, don't account it again. */ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_clone_buffers buf; struct io_ring_ctx *src_ctx; bool registered_src; struct file *file; int ret; if (copy_from_user(&buf, arg, sizeof(buf))) return -EFAULT; if (buf.flags & ~(IORING_REGISTER_SRC_REGISTERED|IORING_REGISTER_DST_REPLACE)) return -EINVAL; if (!(buf.flags & IORING_REGISTER_DST_REPLACE) && ctx->buf_table.nr) return -EBUSY; if (memchr_inv(buf.pad, 0, sizeof(buf.pad))) return -EINVAL; registered_src = (buf.flags & IORING_REGISTER_SRC_REGISTERED) != 0; file = io_uring_register_get_file(buf.src_fd, registered_src); if (IS_ERR(file)) return PTR_ERR(file); src_ctx = file->private_data; if (src_ctx != ctx) { mutex_unlock(&ctx->uring_lock); lock_two_rings(ctx, src_ctx); } ret = io_clone_buffers(ctx, src_ctx, &buf); if (src_ctx != ctx) mutex_unlock(&src_ctx->uring_lock); fput(file); return ret; }
7 7 12 2 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 // SPDX-License-Identifier: GPL-2.0-or-later /* * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). * * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> * * Changes: * Fixes: */ #define pr_fmt(fmt) "UDPLite: " fmt #include <linux/export.h> #include <linux/proc_fs.h> #include "udp_impl.h" struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); /* Designate sk as UDP-Lite socket */ static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; } static int udplite_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static int udplite_err(struct sk_buff *skb, u32 info) { return __udp4_lib_err(skb, info, &udplite_table); } static const struct net_protocol udplite_protocol = { .handler = udplite_rcv, .err_handler = udplite_err, .no_policy = 1, }; struct proto udplite_prot = { .name = "UDP-Lite", .owner = THIS_MODULE, .close = udp_lib_close, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udplite_sk_init, .destroy = udp_destroy_sock, .setsockopt = udp_setsockopt, .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, }; EXPORT_SYMBOL(udplite_prot); static struct inet_protosw udplite4_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, .flags = INET_PROTOSW_PERMANENT, }; #ifdef CONFIG_PROC_FS static struct udp_seq_afinfo udplite4_seq_afinfo = { .family = AF_INET, .udp_table = &udplite_table, }; static int __net_init udplite4_proc_init_net(struct net *net) { if (!proc_create_net_data("udplite", 0444, net->proc_net, &udp_seq_ops, sizeof(struct udp_iter_state), &udplite4_seq_afinfo)) return -ENOMEM; return 0; } static void __net_exit udplite4_proc_exit_net(struct net *net) { remove_proc_entry("udplite", net->proc_net); } static struct pernet_operations udplite4_net_ops = { .init = udplite4_proc_init_net, .exit = udplite4_proc_exit_net, }; static __init int udplite4_proc_init(void) { return register_pernet_subsys(&udplite4_net_ops); } #else static inline int udplite4_proc_init(void) { return 0; } #endif void __init udplite4_register(void) { udp_table_init(&udplite_table, "UDP-Lite"); if (proto_register(&udplite_prot, 1)) goto out_register_err; if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) goto out_unregister_proto; inet_register_protosw(&udplite4_protosw); if (udplite4_proc_init()) pr_err("%s: Cannot register /proc!\n", __func__); return; out_unregister_proto: proto_unregister(&udplite_prot); out_register_err: pr_crit("%s: Cannot add UDP-Lite protocol\n", __func__); }
152 11 11 141 144 147 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_DISK_GROUPS_H #define _BCACHEFS_DISK_GROUPS_H #include "disk_groups_types.h" extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups; static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups) { return groups ? (vstruct_end(&groups->field) - (void *) &groups->entries[0]) / sizeof(struct bch_disk_group) : 0; } struct target { enum { TARGET_NULL, TARGET_DEV, TARGET_GROUP, } type; union { unsigned dev; unsigned group; }; }; #define TARGET_DEV_START 1 #define TARGET_GROUP_START (256 + TARGET_DEV_START) static inline u16 dev_to_target(unsigned dev) { return TARGET_DEV_START + dev; } static inline u16 group_to_target(unsigned group) { return TARGET_GROUP_START + group; } static inline struct target target_decode(unsigned target) { if (target >= TARGET_GROUP_START) return (struct target) { .type = TARGET_GROUP, .group = target - TARGET_GROUP_START }; if (target >= TARGET_DEV_START) return (struct target) { .type = TARGET_DEV, .group = target - TARGET_DEV_START }; return (struct target) { .type = TARGET_NULL }; } const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned); static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c, enum bch_data_type data_type, u16 target) { struct bch_devs_mask devs = c->rw_devs[data_type]; const struct bch_devs_mask *t = bch2_target_to_mask(c, target); if (t) bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX); return devs; } static inline bool bch2_target_accepts_data(struct bch_fs *c, enum bch_data_type data_type, u16 target) { struct bch_devs_mask rw_devs = target_rw_devs(c, data_type, target); return !bitmap_empty(rw_devs.d, BCH_SB_MEMBERS_MAX); } bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned); int bch2_disk_path_find(struct bch_sb_handle *, const char *); /* Exported for userspace bcachefs-tools: */ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned); void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned); void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned); int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); #define bch2_opt_target (struct bch_opt_fn) { \ .parse = bch2_opt_target_parse, \ .to_text = bch2_opt_target_to_text, \ } int bch2_sb_disk_groups_to_cpu(struct bch_fs *); int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); const char *bch2_sb_validate_disk_groups(struct bch_sb *, struct bch_sb_field *); void bch2_disk_groups_to_text(struct printbuf *, struct bch_fs *); #endif /* _BCACHEFS_DISK_GROUPS_H */
46 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_HUGETLB_H #define _ASM_GENERIC_HUGETLB_H #include <linux/swap.h> #include <linux/swapops.h> static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) { return mk_pte(page, pgprot); } static inline unsigned long huge_pte_write(pte_t pte) { return pte_write(pte); } static inline unsigned long huge_pte_dirty(pte_t pte) { return pte_dirty(pte); } static inline pte_t huge_pte_mkwrite(pte_t pte) { return pte_mkwrite_novma(pte); } #ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT static inline pte_t huge_pte_wrprotect(pte_t pte) { return pte_wrprotect(pte); } #endif static inline pte_t huge_pte_mkdirty(pte_t pte) { return pte_mkdirty(pte); } static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) { return pte_modify(pte, newprot); } #ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD_WP static inline pte_t huge_pte_mkuffd_wp(pte_t pte) { return huge_pte_wrprotect(pte_mkuffd_wp(pte)); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP static inline pte_t huge_pte_clear_uffd_wp(pte_t pte) { return pte_clear_uffd_wp(pte); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_UFFD_WP static inline int huge_pte_uffd_wp(pte_t pte) { return pte_uffd_wp(pte); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { pte_clear(mm, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { free_pgd_range(tlb, addr, end, floor, ceiling); } #endif #ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { set_pte_at(mm, addr, ptep, pte); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { return ptep_get_and_clear(mm, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return ptep_clear_flush(vma, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGE_PTE_NONE static inline int huge_pte_none(pte_t pte) { return pte_none(pte); } #endif /* Please refer to comments above pte_none_mostly() for the usage */ #ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY static inline int huge_pte_none_mostly(pte_t pte) { return huge_pte_none(pte) || is_pte_marker(pte); } #endif #ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { return 0; } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { ptep_set_wrprotect(mm, addr, ptep); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { return ptep_set_access_flags(vma, addr, ptep, pte, dirty); } #endif #ifndef __HAVE_ARCH_HUGE_PTEP_GET static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { return ptep_get(ptep); } #endif #ifndef __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED static inline bool gigantic_page_runtime_supported(void) { return IS_ENABLED(CONFIG_ARCH_HAS_GIGANTIC_PAGE); } #endif /* __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED */ #endif /* _ASM_GENERIC_HUGETLB_H */
12 2 1 1 1 1 1 1 1 1 1 4 4 1 2 27 10 10 10 10 10 28 1 1 18 12 2 4 2 3 28 1 28 2 26 26 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */ #include <linux/skbuff.h> #include <linux/skbuff_ref.h> #include <linux/workqueue.h> #include <net/strparser.h> #include <net/tcp.h> #include <net/sock.h> #include <net/tls.h> #include "tls.h" static struct workqueue_struct *tls_strp_wq; static void tls_strp_abort_strp(struct tls_strparser *strp, int err) { if (strp->stopped) return; strp->stopped = 1; /* Report an error on the lower socket */ WRITE_ONCE(strp->sk->sk_err, -err); /* Paired with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(strp->sk); } static void tls_strp_anchor_free(struct tls_strparser *strp) { struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1); if (!strp->copy_mode) shinfo->frag_list = NULL; consume_skb(strp->anchor); strp->anchor = NULL; } static struct sk_buff * tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb, int offset, int len) { struct sk_buff *skb; int i, err; skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER, &err, strp->sk->sk_allocation); if (!skb) return NULL; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON_ONCE(skb_copy_bits(in_skb, offset, skb_frag_address(frag), skb_frag_size(frag))); offset += skb_frag_size(frag); } skb->len = len; skb->data_len = len; skb_copy_header(skb, in_skb); return skb; } /* Create a new skb with the contents of input copied to its page frags */ static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp) { struct strp_msg *rxm; struct sk_buff *skb; skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset, strp->stm.full_len); if (!skb) return NULL; rxm = strp_msg(skb); rxm->offset = 0; return skb; } /* Steal the input skb, input msg is invalid after calling this function */ struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx) { struct tls_strparser *strp = &ctx->strp; #ifdef CONFIG_TLS_DEVICE DEBUG_NET_WARN_ON_ONCE(!strp->anchor->decrypted); #else /* This function turns an input into an output, * that can only happen if we have offload. */ WARN_ON(1); #endif if (strp->copy_mode) { struct sk_buff *skb; /* Replace anchor with an empty skb, this is a little * dangerous but __tls_cur_msg() warns on empty skbs * so hopefully we'll catch abuses. */ skb = alloc_skb(0, strp->sk->sk_allocation); if (!skb) return NULL; swap(strp->anchor, skb); return skb; } return tls_strp_msg_make_copy(strp); } /* Force the input skb to be in copy mode. The data ownership remains * with the input skb itself (meaning unpause will wipe it) but it can * be modified. */ int tls_strp_msg_cow(struct tls_sw_context_rx *ctx) { struct tls_strparser *strp = &ctx->strp; struct sk_buff *skb; if (strp->copy_mode) return 0; skb = tls_strp_msg_make_copy(strp); if (!skb) return -ENOMEM; tls_strp_anchor_free(strp); strp->anchor = skb; tcp_read_done(strp->sk, strp->stm.full_len); strp->copy_mode = 1; return 0; } /* Make a clone (in the skb sense) of the input msg to keep a reference * to the underlying data. The reference-holding skbs get placed on * @dst. */ int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst) { struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); if (strp->copy_mode) { struct sk_buff *skb; WARN_ON_ONCE(!shinfo->nr_frags); /* We can't skb_clone() the anchor, it gets wiped by unpause */ skb = alloc_skb(0, strp->sk->sk_allocation); if (!skb) return -ENOMEM; __skb_queue_tail(dst, strp->anchor); strp->anchor = skb; } else { struct sk_buff *iter, *clone; int chunk, len, offset; offset = strp->stm.offset; len = strp->stm.full_len; iter = shinfo->frag_list; while (len > 0) { if (iter->len <= offset) { offset -= iter->len; goto next; } chunk = iter->len - offset; offset = 0; clone = skb_clone(iter, strp->sk->sk_allocation); if (!clone) return -ENOMEM; __skb_queue_tail(dst, clone); len -= chunk; next: iter = iter->next; } } return 0; } static void tls_strp_flush_anchor_copy(struct tls_strparser *strp) { struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); int i; DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1); for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i], false); shinfo->nr_frags = 0; if (strp->copy_mode) { kfree_skb_list(shinfo->frag_list); shinfo->frag_list = NULL; } strp->copy_mode = 0; strp->mixed_decrypted = 0; } static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb, struct sk_buff *in_skb, unsigned int offset, size_t in_len) { size_t len, chunk; skb_frag_t *frag; int sz; frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE]; len = in_len; /* First make sure we got the header */ if (!strp->stm.full_len) { /* Assume one page is more than enough for headers */ chunk = min_t(size_t, len, PAGE_SIZE - skb_frag_size(frag)); WARN_ON_ONCE(skb_copy_bits(in_skb, offset, skb_frag_address(frag) + skb_frag_size(frag), chunk)); skb->len += chunk; skb->data_len += chunk; skb_frag_size_add(frag, chunk); sz = tls_rx_msg_size(strp, skb); if (sz < 0) return sz; /* We may have over-read, sz == 0 is guaranteed under-read */ if (unlikely(sz && sz < skb->len)) { int over = skb->len - sz; WARN_ON_ONCE(over > chunk); skb->len -= over; skb->data_len -= over; skb_frag_size_add(frag, -over); chunk -= over; } frag++; len -= chunk; offset += chunk; strp->stm.full_len = sz; if (!strp->stm.full_len) goto read_done; } /* Load up more data */ while (len && strp->stm.full_len > skb->len) { chunk = min_t(size_t, len, strp->stm.full_len - skb->len); chunk = min_t(size_t, chunk, PAGE_SIZE - skb_frag_size(frag)); WARN_ON_ONCE(skb_copy_bits(in_skb, offset, skb_frag_address(frag) + skb_frag_size(frag), chunk)); skb->len += chunk; skb->data_len += chunk; skb_frag_size_add(frag, chunk); frag++; len -= chunk; offset += chunk; } read_done: return in_len - len; } static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb, struct sk_buff *in_skb, unsigned int offset, size_t in_len) { struct sk_buff *nskb, *first, *last; struct skb_shared_info *shinfo; size_t chunk; int sz; if (strp->stm.full_len) chunk = strp->stm.full_len - skb->len; else chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; chunk = min(chunk, in_len); nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk); if (!nskb) return -ENOMEM; shinfo = skb_shinfo(skb); if (!shinfo->frag_list) { shinfo->frag_list = nskb; nskb->prev = nskb; } else { first = shinfo->frag_list; last = first->prev; last->next = nskb; first->prev = nskb; } skb->len += chunk; skb->data_len += chunk; if (!strp->stm.full_len) { sz = tls_rx_msg_size(strp, skb); if (sz < 0) return sz; /* We may have over-read, sz == 0 is guaranteed under-read */ if (unlikely(sz && sz < skb->len)) { int over = skb->len - sz; WARN_ON_ONCE(over > chunk); skb->len -= over; skb->data_len -= over; __pskb_trim(nskb, nskb->len - over); chunk -= over; } strp->stm.full_len = sz; } return chunk; } static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, unsigned int offset, size_t in_len) { struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data; struct sk_buff *skb; int ret; if (strp->msg_ready) return 0; skb = strp->anchor; if (!skb->len) skb_copy_decrypted(skb, in_skb); else strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb); if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted) ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len); else ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len); if (ret < 0) { desc->error = ret; ret = 0; } if (strp->stm.full_len && strp->stm.full_len == skb->len) { desc->count = 0; WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); } return ret; } static int tls_strp_read_copyin(struct tls_strparser *strp) { read_descriptor_t desc; desc.arg.data = strp; desc.error = 0; desc.count = 1; /* give more than one skb per call */ /* sk should be locked here, so okay to do read_sock */ tcp_read_sock(strp->sk, &desc, tls_strp_copyin); return desc.error; } static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) { struct skb_shared_info *shinfo; struct page *page; int need_spc, len; /* If the rbuf is small or rcv window has collapsed to 0 we need * to read the data out. Otherwise the connection will stall. * Without pressure threshold of INT_MAX will never be ready. */ if (likely(qshort && !tcp_epollin_ready(strp->sk, INT_MAX))) return 0; shinfo = skb_shinfo(strp->anchor); shinfo->frag_list = NULL; /* If we don't know the length go max plus page for cipher overhead */ need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; for (len = need_spc; len > 0; len -= PAGE_SIZE) { page = alloc_page(strp->sk->sk_allocation); if (!page) { tls_strp_flush_anchor_copy(strp); return -ENOMEM; } skb_fill_page_desc(strp->anchor, shinfo->nr_frags++, page, 0, 0); } strp->copy_mode = 1; strp->stm.offset = 0; strp->anchor->len = 0; strp->anchor->data_len = 0; strp->anchor->truesize = round_up(need_spc, PAGE_SIZE); tls_strp_read_copyin(strp); return 0; } static bool tls_strp_check_queue_ok(struct tls_strparser *strp) { unsigned int len = strp->stm.offset + strp->stm.full_len; struct sk_buff *first, *skb; u32 seq; first = skb_shinfo(strp->anchor)->frag_list; skb = first; seq = TCP_SKB_CB(first)->seq; /* Make sure there's no duplicate data in the queue, * and the decrypted status matches. */ while (skb->len < len) { seq += skb->len; len -= skb->len; skb = skb->next; if (TCP_SKB_CB(skb)->seq != seq) return false; if (skb_cmp_decrypted(first, skb)) return false; } return true; } static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) { struct tcp_sock *tp = tcp_sk(strp->sk); struct sk_buff *first; u32 offset; first = tcp_recv_skb(strp->sk, tp->copied_seq, &offset); if (WARN_ON_ONCE(!first)) return; /* Bestow the state onto the anchor */ strp->anchor->len = offset + len; strp->anchor->data_len = offset + len; strp->anchor->truesize = offset + len; skb_shinfo(strp->anchor)->frag_list = first; skb_copy_header(strp->anchor, first); strp->anchor->destructor = NULL; strp->stm.offset = offset; } void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) { struct strp_msg *rxm; struct tls_msg *tlm; DEBUG_NET_WARN_ON_ONCE(!strp->msg_ready); DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len); if (!strp->copy_mode && force_refresh) { if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) return; tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); } rxm = strp_msg(strp->anchor); rxm->full_len = strp->stm.full_len; rxm->offset = strp->stm.offset; tlm = tls_msg(strp->anchor); tlm->control = strp->mark; } /* Called with lock held on lower socket */ static int tls_strp_read_sock(struct tls_strparser *strp) { int sz, inq; inq = tcp_inq(strp->sk); if (inq < 1) return 0; if (unlikely(strp->copy_mode)) return tls_strp_read_copyin(strp); if (inq < strp->stm.full_len) return tls_strp_read_copy(strp, true); if (!strp->stm.full_len) { tls_strp_load_anchor_with_queue(strp, inq); sz = tls_rx_msg_size(strp, strp->anchor); if (sz < 0) { tls_strp_abort_strp(strp, sz); return sz; } strp->stm.full_len = sz; if (!strp->stm.full_len || inq < strp->stm.full_len) return tls_strp_read_copy(strp, true); } if (!tls_strp_check_queue_ok(strp)) return tls_strp_read_copy(strp, false); WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); return 0; } void tls_strp_check_rcv(struct tls_strparser *strp) { if (unlikely(strp->stopped) || strp->msg_ready) return; if (tls_strp_read_sock(strp) == -ENOMEM) queue_work(tls_strp_wq, &strp->work); } /* Lower sock lock held */ void tls_strp_data_ready(struct tls_strparser *strp) { /* This check is needed to synchronize with do_tls_strp_work. * do_tls_strp_work acquires a process lock (lock_sock) whereas * the lock held here is bh_lock_sock. The two locks can be * held by different threads at the same time, but bh_lock_sock * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ if (sock_owned_by_user_nocheck(strp->sk)) { queue_work(tls_strp_wq, &strp->work); return; } tls_strp_check_rcv(strp); } static void tls_strp_work(struct work_struct *w) { struct tls_strparser *strp = container_of(w, struct tls_strparser, work); lock_sock(strp->sk); tls_strp_check_rcv(strp); release_sock(strp->sk); } void tls_strp_msg_done(struct tls_strparser *strp) { WARN_ON(!strp->stm.full_len); if (likely(!strp->copy_mode)) tcp_read_done(strp->sk, strp->stm.full_len); else tls_strp_flush_anchor_copy(strp); WRITE_ONCE(strp->msg_ready, 0); memset(&strp->stm, 0, sizeof(strp->stm)); tls_strp_check_rcv(strp); } void tls_strp_stop(struct tls_strparser *strp) { strp->stopped = 1; } int tls_strp_init(struct tls_strparser *strp, struct sock *sk) { memset(strp, 0, sizeof(*strp)); strp->sk = sk; strp->anchor = alloc_skb(0, GFP_KERNEL); if (!strp->anchor) return -ENOMEM; INIT_WORK(&strp->work, tls_strp_work); return 0; } /* strp must already be stopped so that tls_strp_recv will no longer be called. * Note that tls_strp_done is not called with the lower socket held. */ void tls_strp_done(struct tls_strparser *strp) { WARN_ON(!strp->stopped); cancel_work_sync(&strp->work); tls_strp_anchor_free(strp); } int __init tls_strp_dev_init(void) { tls_strp_wq = create_workqueue("tls-strp"); if (unlikely(!tls_strp_wq)) return -ENOMEM; return 0; } void tls_strp_dev_exit(void) { destroy_workqueue(tls_strp_wq); }
14 1 14 14 1 1 1 13 14 14 13 12 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/poll.h> #include <linux/sched.h> #include <linux/file.h> #include <linux/cdev.h> #include <linux/anon_inodes.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> #include <rdma/ib.h> #include <rdma/uverbs_std_types.h> #include <rdma/rdma_netlink.h> #include "uverbs.h" #include "core_priv.h" #include "rdma_core.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS, IB_UVERBS_NUM_FIXED_MINOR = 32, IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR, }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static dev_t dynamic_uverbs_dev; static DEFINE_IDA(uverbs_ida); static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); static struct ib_client uverbs_client; static char *uverbs_devnode(const struct device *dev, umode_t *mode) { if (mode) *mode = 0666; return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static const struct class uverbs_class = { .name = "infiniband_verbs", .devnode = uverbs_devnode, }; /* * Must be called with the ufile->device->disassociate_srcu held, and the lock * must be held until use of the ucontext is finished. */ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) { /* * We do not hold the hw_destroy_rwsem lock for this flow, instead * srcu is used. It does not matter if someone races this with * get_context, we get NULL or valid ucontext. */ struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext); if (!srcu_dereference(ufile->device->ib_dev, &ufile->device->disassociate_srcu)) return ERR_PTR(-EIO); if (!ucontext) return ERR_PTR(-EINVAL); return ucontext; } EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; int ret; ret = mw->device->ops.dealloc_mw(mw); if (ret) return ret; atomic_dec(&pd->usecnt); kfree(mw); return ret; } static void ib_uverbs_release_dev(struct device *device) { struct ib_uverbs_device *dev = container_of(device, struct ib_uverbs_device, dev); uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); mutex_destroy(&dev->lists_mutex); mutex_destroy(&dev->xrcd_tree_mutex); kfree(dev); } void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { spin_lock_irq(&ev_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&ev_file->ev_queue.lock); uverbs_uobject_put(&ev_file->uobj); } ib_uverbs_release_uevent(&uobj->uevent); } void ib_uverbs_release_uevent(struct ib_uevent_object *uobj) { struct ib_uverbs_async_event_file *async_file = uobj->event_file; struct ib_uverbs_event *evt, *tmp; if (!async_file) return; spin_lock_irq(&async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&async_file->ev_queue.lock); uverbs_uobject_put(&async_file->uobj); } void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { ib_detach_mcast(qp, &mcast->gid, mcast->lid); list_del(&mcast->list); kfree(mcast); } } static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); } void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); struct ib_device *ib_dev; int srcu_key; release_ufile_idr_uobject(file); srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (ib_dev && !ib_dev->ops.disassociate_ucontext) module_put(ib_dev->ops.owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); if (refcount_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); if (file->default_async_file) uverbs_uobject_put(&file->default_async_file->uobj); put_device(&file->device->dev); if (file->disassociate_page) __free_pages(file->disassociate_page, 0); mutex_destroy(&file->disassociation_lock); mutex_destroy(&file->umap_lock); mutex_destroy(&file->ucontext_lock); kfree(file); } static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, struct file *filp, char __user *buf, size_t count, loff_t *pos, size_t eventsz) { struct ib_uverbs_event *event; int ret = 0; spin_lock_irq(&ev_queue->lock); while (list_empty(&ev_queue->event_list)) { if (ev_queue->is_closed) { spin_unlock_irq(&ev_queue->lock); return -EIO; } spin_unlock_irq(&ev_queue->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(ev_queue->poll_wait, (!list_empty(&ev_queue->event_list) || ev_queue->is_closed))) return -ERESTARTSYS; spin_lock_irq(&ev_queue->lock); } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { list_del(ev_queue->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } spin_unlock_irq(&ev_queue->lock); if (event) { if (copy_to_user(buf, event, eventsz)) ret = -EFAULT; else ret = eventsz; } kfree(event); return ret; } static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_async_event_file *file = filp->private_data; return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_async_event_desc)); } static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_comp_event_desc)); } static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, struct file *filp, struct poll_table_struct *wait) { __poll_t pollflags = 0; poll_wait(filp, &ev_queue->poll_wait, wait); spin_lock_irq(&ev_queue->lock); if (!list_empty(&ev_queue->event_list)) pollflags = EPOLLIN | EPOLLRDNORM; else if (ev_queue->is_closed) pollflags = EPOLLERR; spin_unlock_irq(&ev_queue->lock); return pollflags; } static __poll_t ib_uverbs_async_event_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_uverbs_async_event_file *file = filp->private_data; return ib_uverbs_event_poll(&file->ev_queue, filp, wait); } static __poll_t ib_uverbs_comp_event_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); } static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_async_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->ev_queue.async_queue); } static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); } const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_comp_event_read, .poll = ib_uverbs_comp_event_poll, .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_comp_event_fasync, }; const struct file_operations uverbs_async_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_async_event_read, .poll = ib_uverbs_async_event_poll, .release = uverbs_async_event_release, .fasync = ib_uverbs_async_event_fasync, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_queue *ev_queue = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; if (!ev_queue) return; spin_lock_irqsave(&ev_queue->lock, flags); if (ev_queue->is_closed) { spin_unlock_irqrestore(&ev_queue->lock, flags); return; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&ev_queue->lock, flags); return; } uobj = cq->uobject; entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &ev_queue->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&ev_queue->lock, flags); wake_up_interruptible(&ev_queue->poll_wait); kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, __u64 element, __u64 event, struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; if (!async_file) return; spin_lock_irqsave(&async_file->ev_queue.lock, flags); if (async_file->ev_queue.is_closed) { spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &async_file->ev_queue.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); wake_up_interruptible(&async_file->ev_queue.poll_wait); kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN); } static void uverbs_uobj_event(struct ib_uevent_object *eobj, struct ib_event *event) { ib_uverbs_async_handler(eobj->event_file, eobj->uobject.user_handle, event->event, &eobj->event_list, &eobj->events_reported); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { uverbs_uobj_event(&event->element.cq->uobject->uevent, event); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { /* for XRC target qp's, check that qp is live */ if (!event->element.qp->uobject) return; uverbs_uobj_event(&event->element.qp->uobject->uevent, event); } void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { uverbs_uobj_event(&event->element.wq->uobject->uevent, event); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { uverbs_uobj_event(&event->element.srq->uobject->uevent, event); } static void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { ib_uverbs_async_handler( container_of(handler, struct ib_uverbs_async_event_file, event_handler), event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) { spin_lock_init(&ev_queue->lock); INIT_LIST_HEAD(&ev_queue->event_list); init_waitqueue_head(&ev_queue->poll_wait); ev_queue->is_closed = 0; ev_queue->async_queue = NULL; } void ib_uverbs_init_async_event_file( struct ib_uverbs_async_event_file *async_file) { struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile; struct ib_device *ib_dev = async_file->uobj.context->device; ib_uverbs_init_event_queue(&async_file->ev_queue); /* The first async_event_file becomes the default one for the file. */ mutex_lock(&uverbs_file->ucontext_lock); if (!uverbs_file->default_async_file) { /* Pairs with the put in ib_uverbs_release_file */ uverbs_uobject_get(&async_file->uobj); smp_store_release(&uverbs_file->default_async_file, async_file); } mutex_unlock(&uverbs_file->ucontext_lock); INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev, ib_uverbs_event_handler); ib_register_event_handler(&async_file->event_handler); } static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, const struct uverbs_api_write_method *method_elm) { if (method_elm->is_ex) { count -= sizeof(*hdr) + sizeof(*ex_hdr); if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) return -EINVAL; if (hdr->in_words * 8 < method_elm->req_size) return -ENOSPC; if (ex_hdr->cmd_hdr_reserved) return -EINVAL; if (ex_hdr->response) { if (!hdr->out_words && !ex_hdr->provider_out_words) return -EINVAL; if (hdr->out_words * 8 < method_elm->resp_size) return -ENOSPC; if (!access_ok(u64_to_user_ptr(ex_hdr->response), (hdr->out_words + ex_hdr->provider_out_words) * 8)) return -EFAULT; } else { if (hdr->out_words || ex_hdr->provider_out_words) return -EINVAL; } return 0; } /* not extended command */ if (hdr->in_words * 4 != count) return -EINVAL; if (count < method_elm->req_size + sizeof(*hdr)) { /* * rdma-core v18 and v19 have a bug where they send DESTROY_CQ * with a 16 byte write instead of 24. Old kernels didn't * check the size so they allowed this. Now that the size is * checked provide a compatibility work around to not break * those userspaces. */ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && count == 16) { hdr->in_words = 6; return 0; } return -ENOSPC; } if (hdr->out_words * 4 < method_elm->resp_size) return -ENOSPC; return 0; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; const struct uverbs_api_write_method *method_elm; struct uverbs_api *uapi = file->device->uapi; struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_uverbs_cmd_hdr hdr; struct uverbs_attr_bundle bundle; int srcu_key; ssize_t ret; if (!ib_safe_file_access(filp)) { pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", task_tgid_vnr(current), current->comm); return -EACCES; } if (count < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; method_elm = uapi_get_method(uapi, hdr.command); if (IS_ERR(method_elm)) return PTR_ERR(method_elm); if (method_elm->is_ex) { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) return -EFAULT; } ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); if (ret) return ret; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); buf += sizeof(hdr); memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); bundle.ufile = file; bundle.context = NULL; /* only valid if bundle has uobject */ bundle.uobject = NULL; if (!method_elm->is_ex) { size_t in_len = hdr.in_words * 4 - sizeof(hdr); size_t out_len = hdr.out_words * 4; u64 response = 0; if (method_elm->has_udata) { bundle.driver_udata.inlen = in_len - method_elm->req_size; in_len = method_elm->req_size; if (bundle.driver_udata.inlen) bundle.driver_udata.inbuf = buf + in_len; else bundle.driver_udata.inbuf = NULL; } else { memset(&bundle.driver_udata, 0, sizeof(bundle.driver_udata)); } if (method_elm->has_resp) { /* * The macros check that if has_resp is set * then the command request structure starts * with a '__aligned u64 response' member. */ ret = get_user(response, (const u64 __user *)buf); if (ret) goto out_unlock; if (method_elm->has_udata) { bundle.driver_udata.outlen = out_len - method_elm->resp_size; out_len = method_elm->resp_size; if (bundle.driver_udata.outlen) bundle.driver_udata.outbuf = u64_to_user_ptr(response + out_len); else bundle.driver_udata.outbuf = NULL; } } else { bundle.driver_udata.outlen = 0; bundle.driver_udata.outbuf = NULL; } ib_uverbs_init_udata_buf_or_null( &bundle.ucore, buf, u64_to_user_ptr(response), in_len, out_len); } else { buf += sizeof(ex_hdr); ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, u64_to_user_ptr(ex_hdr.response), hdr.in_words * 8, hdr.out_words * 8); ib_uverbs_init_udata_buf_or_null( &bundle.driver_udata, buf + bundle.ucore.inlen, u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); } ret = method_elm->handler(&bundle); if (bundle.uobject) uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true, !ret, &bundle); out_unlock: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return (ret) ? : count; } static const struct vm_operations_struct rdma_umap_ops; static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; struct ib_ucontext *ucontext; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ucontext = ib_uverbs_get_ucontext_file(file); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto out; } mutex_lock(&file->disassociation_lock); vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); mutex_unlock(&file->disassociation_lock); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } /* * The VMA has been dup'd, initialize the vm_private_data with a new tracking * struct */ static void rdma_umap_open(struct vm_area_struct *vma) { struct ib_uverbs_file *ufile = vma->vm_file->private_data; struct rdma_umap_priv *opriv = vma->vm_private_data; struct rdma_umap_priv *priv; if (!opriv) return; /* We are racing with disassociation */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) goto out_zap; mutex_lock(&ufile->disassociation_lock); /* * Disassociation already completed, the VMA should already be zapped. */ if (!ufile->ucontext) goto out_unlock; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_unlock; rdma_umap_priv_init(priv, vma, opriv->entry); mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); return; out_unlock: mutex_unlock(&ufile->disassociation_lock); up_read(&ufile->hw_destroy_rwsem); out_zap: /* * We can't allow the VMA to be created with the actual IO pages, that * would break our API contract, and it can't be stopped at this * point, so zap it. */ vma->vm_private_data = NULL; zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); } static void rdma_umap_close(struct vm_area_struct *vma) { struct ib_uverbs_file *ufile = vma->vm_file->private_data; struct rdma_umap_priv *priv = vma->vm_private_data; if (!priv) return; /* * The vma holds a reference on the struct file that created it, which * in turn means that the ib_uverbs_file is guaranteed to exist at * this point. */ mutex_lock(&ufile->umap_lock); if (priv->entry) rdma_user_mmap_entry_put(priv->entry); list_del(&priv->list); mutex_unlock(&ufile->umap_lock); kfree(priv); } /* * Once the zap_vma_ptes has been called touches to the VMA will come here and * we return a dummy writable zero page for all the pfns. */ static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) { struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; struct rdma_umap_priv *priv = vmf->vma->vm_private_data; vm_fault_t ret = 0; if (!priv) return VM_FAULT_SIGBUS; /* Read only pages can just use the system zero page. */ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { vmf->page = ZERO_PAGE(vmf->address); get_page(vmf->page); return 0; } mutex_lock(&ufile->umap_lock); if (!ufile->disassociate_page) ufile->disassociate_page = alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); if (ufile->disassociate_page) { /* * This VMA is forced to always be shared so this doesn't have * to worry about COW. */ vmf->page = ufile->disassociate_page; get_page(vmf->page); } else { ret = VM_FAULT_SIGBUS; } mutex_unlock(&ufile->umap_lock); return ret; } static const struct vm_operations_struct rdma_umap_ops = { .open = rdma_umap_open, .close = rdma_umap_close, .fault = rdma_umap_fault, }; void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; mutex_lock(&ufile->disassociation_lock); while (1) { struct mm_struct *mm = NULL; /* Get an arbitrary mm pointer that hasn't been cleaned yet */ mutex_lock(&ufile->umap_lock); while (!list_empty(&ufile->umaps)) { int ret; priv = list_first_entry(&ufile->umaps, struct rdma_umap_priv, list); mm = priv->vma->vm_mm; ret = mmget_not_zero(mm); if (!ret) { list_del_init(&priv->list); if (priv->entry) { rdma_user_mmap_entry_put(priv->entry); priv->entry = NULL; } mm = NULL; continue; } break; } mutex_unlock(&ufile->umap_lock); if (!mm) { mutex_unlock(&ufile->disassociation_lock); return; } /* * The umap_lock is nested under mmap_lock since it used within * the vma_ops callbacks, so we have to clean the list one mm * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal. */ mmap_read_lock(mm); mutex_lock(&ufile->umap_lock); list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list) { struct vm_area_struct *vma = priv->vma; if (vma->vm_mm != mm) continue; list_del_init(&priv->list); zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); if (priv->entry) { rdma_user_mmap_entry_put(priv->entry); priv->entry = NULL; } } mutex_unlock(&ufile->umap_lock); mmap_read_unlock(mm); mmput(mm); } mutex_unlock(&ufile->disassociation_lock); } /** * rdma_user_mmap_disassociate() - Revoke mmaps for a device * @device: device to revoke * * This function should be called by drivers that need to disable mmaps for the * device, for instance because it is going to be reset. */ void rdma_user_mmap_disassociate(struct ib_device *device) { struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); struct ib_uverbs_file *ufile; mutex_lock(&uverbs_dev->lists_mutex); list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) { if (ufile->ucontext) uverbs_user_mmap_disassociate(ufile); } mutex_unlock(&uverbs_dev->lists_mutex); } EXPORT_SYMBOL(rdma_user_mmap_disassociate); /* * ib_uverbs_open() does not need the BKL: * * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the open method will either immediately run -ENXIO, or all * required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; struct ib_device *ib_dev; int ret; int module_dependent; int srcu_key; dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); if (!refcount_inc_not_zero(&dev->refcount)) return -ENXIO; get_device(&dev->dev); srcu_key = srcu_read_lock(&dev->disassociate_srcu); mutex_lock(&dev->lists_mutex); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto err; } if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) { ret = -EPERM; goto err; } /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ module_dependent = !(ib_dev->ops.disassociate_ucontext); if (module_dependent) { if (!try_module_get(ib_dev->ops.owner)) { ret = -ENODEV; goto err; } } file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) { ret = -ENOMEM; if (module_dependent) goto err_module; goto err; } file->device = dev; kref_init(&file->ref); mutex_init(&file->ucontext_lock); spin_lock_init(&file->uobjects_lock); INIT_LIST_HEAD(&file->uobjects); init_rwsem(&file->hw_destroy_rwsem); mutex_init(&file->umap_lock); INIT_LIST_HEAD(&file->umaps); mutex_init(&file->disassociation_lock); filp->private_data = file; list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); setup_ufile_idr_uobject(file); return stream_open(inode, filp); err_module: module_put(ib_dev->ops.owner); err: mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); if (refcount_dec_and_test(&dev->refcount)) ib_uverbs_comp_dev(dev); put_device(&dev->dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); mutex_lock(&file->device->lists_mutex); list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); kref_put(&file->ref, ib_uverbs_release_file); return 0; } static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static const struct file_operations uverbs_mmap_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close, .unlocked_ioctl = ib_uverbs_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data, struct ib_client_nl_info *res) { struct ib_uverbs_device *uverbs_dev = client_data; int ret; if (res->port != -1) return -EINVAL; res->abi = ibdev->ops.uverbs_abi_ver; res->cdev = &uverbs_dev->dev; /* * To support DRIVER_ID binding in userspace some of the driver need * upgrading to expose their PCI dependent revision information * through get_context instead of relying on modalias matching. When * the drivers are fixed they can drop this flag. */ if (!ibdev->ops.uverbs_no_driver_id_binding) { ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, ibdev->ops.driver_id); if (ret) return ret; } return 0; } static struct ib_client uverbs_client = { .name = "uverbs", .no_kverbs_req = true, .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one, .get_nl_info = ib_uverbs_get_nl_info, }; MODULE_ALIAS_RDMA_CLIENT("uverbs"); static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev)); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR_RO(ibdev); static ssize_t abi_version_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR_RO(abi_version); static struct attribute *ib_dev_attrs[] = { &dev_attr_abi_version.attr, &dev_attr_ibdev.attr, NULL, }; static const struct attribute_group dev_attr_group = { .attrs = ib_dev_attrs, }; static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); static int ib_uverbs_create_uapi(struct ib_device *device, struct ib_uverbs_device *uverbs_dev) { struct uverbs_api *uapi; uapi = uverbs_alloc_api(device); if (IS_ERR(uapi)) return PTR_ERR(uapi); uverbs_dev->uapi = uapi; return 0; } static int ib_uverbs_add_one(struct ib_device *device) { int devnum; dev_t base; struct ib_uverbs_device *uverbs_dev; int ret; if (!device->ops.alloc_ucontext || device->type == RDMA_DEVICE_TYPE_SMI) return -EOPNOTSUPP; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); if (!uverbs_dev) return -ENOMEM; ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); if (ret) { kfree(uverbs_dev); return -ENOMEM; } device_initialize(&uverbs_dev->dev); uverbs_dev->dev.class = &uverbs_class; uverbs_dev->dev.parent = device->dev.parent; uverbs_dev->dev.release = ib_uverbs_release_dev; uverbs_dev->groups[0] = &dev_attr_group; uverbs_dev->dev.groups = uverbs_dev->groups; refcount_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, GFP_KERNEL); if (devnum < 0) { ret = -ENOMEM; goto err; } uverbs_dev->devnum = devnum; if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; else base = IB_UVERBS_BASE_DEV + devnum; ret = ib_uverbs_create_uapi(device, uverbs_dev); if (ret) goto err_uapi; uverbs_dev->dev.devt = base; dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); cdev_init(&uverbs_dev->cdev, device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops); uverbs_dev->cdev.owner = THIS_MODULE; ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); if (ret) goto err_uapi; ib_set_client_data(device, &uverbs_client, uverbs_dev); return 0; err_uapi: ida_free(&uverbs_ida, devnum); err: if (refcount_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); put_device(&uverbs_dev->dev); return ret; } static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; /* Pending running commands to terminate */ uverbs_disassociate_api_pre(uverbs_dev); mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); list_del_init(&file->list); kref_get(&file->ref); /* We must release the mutex before going ahead and calling * uverbs_cleanup_ufile, as it might end up indirectly calling * uverbs_close, for example due to freeing the resources (e.g * mmput). */ mutex_unlock(&uverbs_dev->lists_mutex); uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); mutex_lock(&uverbs_dev->lists_mutex); } mutex_unlock(&uverbs_dev->lists_mutex); uverbs_disassociate_api(uverbs_dev->uapi); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) { struct ib_uverbs_device *uverbs_dev = client_data; int wait_clients = 1; cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); ida_free(&uverbs_ida, uverbs_dev->devnum); if (device->ops.disassociate_ucontext) { /* We disassociate HW resources and immediately return. * Userspace will see a EIO errno for all future access. * Upon returning, ib_device may be freed internally and is not * valid any more. * uverbs_device is still available until all clients close * their files, then the uverbs device ref count will be zero * and its resources will be freed. * Note: At this point no more files can be opened since the * cdev was deleted, however active clients can still issue * commands and close their open files. */ ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } if (refcount_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); put_device(&uverbs_dev->dev); } static int __init ib_uverbs_init(void) { int ret; ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR, "infiniband_verbs"); if (ret) { pr_err("user_verbs: couldn't register device number\n"); goto out; } ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0, IB_UVERBS_NUM_DYNAMIC_MINOR, "infiniband_verbs"); if (ret) { pr_err("couldn't register dynamic device number\n"); goto out_alloc; } ret = class_register(&uverbs_class); if (ret) { pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr); if (ret) { pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&uverbs_client); if (ret) { pr_err("user_verbs: couldn't register client\n"); goto out_class; } return 0; out_class: class_unregister(&uverbs_class); out_chrdev: unregister_chrdev_region(dynamic_uverbs_dev, IB_UVERBS_NUM_DYNAMIC_MINOR); out_alloc: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR); out: return ret; } static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); class_unregister(&uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_uverbs_dev, IB_UVERBS_NUM_DYNAMIC_MINOR); mmu_notifier_synchronize(); } module_init(ib_uverbs_init); module_exit(ib_uverbs_cleanup);
4 24 24 24 8 13 20 20 20 17 16 3 3 3 3 3 3 35 36 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 // SPDX-License-Identifier: GPL-2.0-only /* Helper handling for netfilter. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/random.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_log.h> #include <net/ip.h> static DEFINE_MUTEX(nf_ct_helper_mutex); struct hlist_head *nf_ct_helper_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hash); unsigned int nf_ct_helper_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); static struct list_head nf_ct_nat_helpers __read_mostly; /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) { return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^ (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; } struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { if (strcmp(h->name, name)) continue; if (h->tuple.src.l3num != NFPROTO_UNSPEC && h->tuple.src.l3num != l3num) continue; if (h->tuple.dst.protonum == protonum) return h; } } return NULL; } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); struct nf_conntrack_helper * nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); #ifdef CONFIG_MODULES if (h == NULL) { rcu_read_unlock(); if (request_module("nfct-helper-%s", name) == 0) { rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); } else { return h; } } #endif if (h != NULL && !try_module_get(h->me)) h = NULL; if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { module_put(h->me); h = NULL; } rcu_read_unlock(); return h; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) { refcount_dec(&helper->refcnt); module_put(helper->me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); static struct nf_conntrack_nat_helper * nf_conntrack_nat_helper_find(const char *mod_name) { struct nf_conntrack_nat_helper *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) { if (!strcmp(cur->mod_name, mod_name)) { found = true; break; } } return found ? cur : NULL; } int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct nf_conntrack_nat_helper *nat; char mod_name[NF_CT_HELPER_NAME_LEN]; int ret = 0; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); if (!h) { rcu_read_unlock(); return -ENOENT; } nat = nf_conntrack_nat_helper_find(h->nat_mod_name); if (!nat) { snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name); rcu_read_unlock(); request_module("%s", mod_name); rcu_read_lock(); nat = nf_conntrack_nat_helper_find(mod_name); if (!nat) { rcu_read_unlock(); return -ENOENT; } } if (!try_module_get(nat->module)) ret = -ENOENT; rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get); void nf_nat_helper_put(struct nf_conntrack_helper *helper) { struct nf_conntrack_nat_helper *nat; nat = nf_conntrack_nat_helper_find(helper->nat_mod_name); if (WARN_ON_ONCE(!nat)) return; module_put(nat->module); } EXPORT_SYMBOL_GPL(nf_nat_helper_put); struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { struct nf_conn_help *help; help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else pr_debug("failed to add helper extension area"); return help; } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; /* We already got a helper explicitly attached (e.g. nft_ct) */ if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; if (WARN_ON_ONCE(!tmpl)) return 0; help = nfct_help(tmpl); if (help != NULL) { helper = rcu_dereference(help->helper); set_bit(IPS_HELPER_BIT, &ct->status); } help = nfct_help(ct); if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); return 0; } if (help == NULL) { help = nf_ct_helper_ext_add(ct, flags); if (help == NULL) return -ENOMEM; } else { /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. */ struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); return 0; } } rcu_assign_pointer(help->helper, helper); return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); /* appropriate ct lock protecting must be taken by caller */ static int unhelp(struct nf_conn *ct, void *me) { struct nf_conn_help *help = nfct_help(ct); if (help && rcu_dereference_raw(help->helper) == me) { nf_conntrack_event(IPCT_HELPER, ct); RCU_INIT_POINTER(help->helper, NULL); } /* We are not intended to delete this conntrack. */ return 0; } void nf_ct_helper_destroy(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; if (help) { rcu_read_lock(); helper = rcu_dereference(help->helper); if (helper && helper->destroy) helper->destroy(ct); rcu_read_unlock(); } } static LIST_HEAD(nf_ct_helper_expectfn_list); void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_del_rcu(&n->head); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (!strcmp(cur->name, name)) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (cur->expectfn == symbol) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); __printf(3, 4) void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, const char *fmt, ...) { const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; /* Called from the helper function, this call never fails */ help = nfct_help(ct); /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); va_end(args); } EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); struct nf_conntrack_helper *cur; int ret = 0, i; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); if (!nf_ct_helper_hash) return -ENOENT; if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; mutex_lock(&nf_ct_helper_mutex); for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) { if (!strcmp(cur->name, me->name) && (cur->tuple.src.l3num == NFPROTO_UNSPEC || cur->tuple.src.l3num == me->tuple.src.l3num) && cur->tuple.dst.protonum == me->tuple.dst.protonum) { ret = -EEXIST; goto out; } } } /* avoid unpredictable behaviour for auto_assign_helper */ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) { hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EEXIST; goto out; } } } refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: mutex_unlock(&nf_ct_helper_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; if (exp->helper == me) return true; this = rcu_dereference_protected(help->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { mutex_lock(&nf_ct_helper_mutex); hlist_del_rcu(&me->hnode); nf_ct_helper_count--; mutex_unlock(&nf_ct_helper_mutex); /* Make sure every nothing is still using the helper unless its a * connection in the hash. */ synchronize_rcu(); nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct), struct module *module) { helper->tuple.src.l3num = l3num; helper->tuple.dst.protonum = protonum; helper->tuple.src.u.all = htons(spec_port); helper->expect_policy = exp_pol; helper->expect_class_max = expect_class_max; helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name), NF_NAT_HELPER_PREFIX "%s", name); if (spec_port == default_port) snprintf(helper->name, sizeof(helper->name), "%s", name); else snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id); } EXPORT_SYMBOL_GPL(nf_ct_helper_init); int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper, unsigned int n) { unsigned int i; int err = 0; for (i = 0; i < n; i++) { err = nf_conntrack_helper_register(&helper[i]); if (err < 0) goto err; } return err; err: if (i > 0) nf_conntrack_helpers_unregister(helper, i); return err; } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register); void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, unsigned int n) { while (n-- > 0) nf_conntrack_helper_unregister(&helper[n]); } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_add_rcu(&nat->list, &nf_ct_nat_helpers); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_register); void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_del_rcu(&nat->list); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); int nf_conntrack_helper_init(void) { nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); if (!nf_ct_helper_hash) return -ENOMEM; INIT_LIST_HEAD(&nf_ct_nat_helpers); return 0; } void nf_conntrack_helper_fini(void) { kvfree(nf_ct_helper_hash); nf_ct_helper_hash = NULL; }
33 33 3 10 2 1 1 2 1 1 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/notifier.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/sysctl.h> #include <linux/export.h> #include <net/ip.h> #include <net/arp.h> /* * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. */ static HLIST_HEAD(ax25_uid_list); static DEFINE_RWLOCK(ax25_uid_lock); int ax25_uid_policy; EXPORT_SYMBOL(ax25_uid_policy); ax25_uid_assoc *ax25_findbyuid(kuid_t uid) { ax25_uid_assoc *ax25_uid, *res = NULL; read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (uid_eq(ax25_uid->uid, uid)) { ax25_uid_hold(ax25_uid); res = ax25_uid; break; } } read_unlock(&ax25_uid_lock); return res; } EXPORT_SYMBOL(ax25_findbyuid); int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) { ax25_uid_assoc *ax25_uid; ax25_uid_assoc *user; unsigned long res; switch (cmd) { case SIOCAX25GETUID: res = -ENOENT; read_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { res = from_kuid_munged(current_user_ns(), ax25_uid->uid); break; } } read_unlock(&ax25_uid_lock); return res; case SIOCAX25ADDUID: { kuid_t sax25_kuid; if (!capable(CAP_NET_ADMIN)) return -EPERM; sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid); if (!uid_valid(sax25_kuid)) return -EINVAL; user = ax25_findbyuid(sax25_kuid); if (user) { ax25_uid_put(user); return -EEXIST; } if (sax->sax25_uid == 0) return -EINVAL; if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL) return -ENOMEM; refcount_set(&ax25_uid->refcount, 1); ax25_uid->uid = sax25_kuid; ax25_uid->call = sax->sax25_call; write_lock(&ax25_uid_lock); hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list); write_unlock(&ax25_uid_lock); return 0; } case SIOCAX25DELUID: if (!capable(CAP_NET_ADMIN)) return -EPERM; ax25_uid = NULL; write_lock(&ax25_uid_lock); ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) break; } if (ax25_uid == NULL) { write_unlock(&ax25_uid_lock); return -ENOENT; } hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); write_unlock(&ax25_uid_lock); return 0; default: return -EINVAL; } return -EINVAL; /*NOTREACHED */ } #ifdef CONFIG_PROC_FS static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) __acquires(ax25_uid_lock) { read_lock(&ax25_uid_lock); return seq_hlist_start_head(&ax25_uid_list, *pos); } static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_hlist_next(v, &ax25_uid_list, pos); } static void ax25_uid_seq_stop(struct seq_file *seq, void *v) __releases(ax25_uid_lock) { read_unlock(&ax25_uid_lock); } static int ax25_uid_seq_show(struct seq_file *seq, void *v) { char buf[11]; if (v == SEQ_START_TOKEN) seq_printf(seq, "Policy: %d\n", ax25_uid_policy); else { struct ax25_uid_assoc *pt; pt = hlist_entry(v, struct ax25_uid_assoc, uid_node); seq_printf(seq, "%6d %s\n", from_kuid_munged(seq_user_ns(seq), pt->uid), ax2asc(buf, &pt->call)); } return 0; } const struct seq_operations ax25_uid_seqops = { .start = ax25_uid_seq_start, .next = ax25_uid_seq_next, .stop = ax25_uid_seq_stop, .show = ax25_uid_seq_show, }; #endif /* * Free all memory associated with UID/Callsign structures. */ void __exit ax25_uid_free(void) { ax25_uid_assoc *ax25_uid; write_lock(&ax25_uid_lock); again: ax25_uid_for_each(ax25_uid, &ax25_uid_list) { hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); goto again; } write_unlock(&ax25_uid_lock); }
628 8970 8 8963 4925 4932 9 5739 5748 3419 16567 3187 3191 82 510 328 434 435 735 5 728 3760 28 1611 28 2075 28 2073 262 338 61 3764 3760 863 101 298 486 30 21 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 // SPDX-License-Identifier: GPL-2.0-only #include <linux/bitmap.h> #include <linux/bug.h> #include <linux/export.h> #include <linux/idr.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/xarray.h> /** * idr_alloc_u32() - Allocate an ID. * @idr: IDR handle. * @ptr: Pointer to be associated with the new ID. * @nextid: Pointer to an ID. * @max: The maximum ID to allocate (inclusive). * @gfp: Memory allocation flags. * * Allocates an unused ID in the range specified by @nextid and @max. * Note that @max is inclusive whereas the @end parameter to idr_alloc() * is exclusive. The new ID is assigned to @nextid before the pointer * is inserted into the IDR, so if @nextid points into the object pointed * to by @ptr, a concurrent lookup will not find an uninitialised ID. * * The caller should provide their own locking to ensure that two * concurrent modifications to the IDR are not possible. Read-only * accesses to the IDR may be done under the RCU read lock or may * exclude simultaneous writers. * * Return: 0 if an ID was allocated, -ENOMEM if memory allocation failed, * or -ENOSPC if no free IDs could be found. If an error occurred, * @nextid is unchanged. */ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, unsigned long max, gfp_t gfp) { struct radix_tree_iter iter; void __rcu **slot; unsigned int base = idr->idr_base; unsigned int id = *nextid; if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR))) idr->idr_rt.xa_flags |= IDR_RT_MARKER; id = (id < base) ? 0 : id - base; radix_tree_iter_init(&iter, id); slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base); if (IS_ERR(slot)) return PTR_ERR(slot); *nextid = iter.index + base; /* there is a memory barrier inside radix_tree_iter_replace() */ radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr); radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE); return 0; } EXPORT_SYMBOL_GPL(idr_alloc_u32); /** * idr_alloc() - Allocate an ID. * @idr: IDR handle. * @ptr: Pointer to be associated with the new ID. * @start: The minimum ID (inclusive). * @end: The maximum ID (exclusive). * @gfp: Memory allocation flags. * * Allocates an unused ID in the range specified by @start and @end. If * @end is <= 0, it is treated as one larger than %INT_MAX. This allows * callers to use @start + N as @end as long as N is within integer range. * * The caller should provide their own locking to ensure that two * concurrent modifications to the IDR are not possible. Read-only * accesses to the IDR may be done under the RCU read lock or may * exclude simultaneous writers. * * Return: The newly allocated ID, -ENOMEM if memory allocation failed, * or -ENOSPC if no free IDs could be found. */ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) { u32 id = start; int ret; if (WARN_ON_ONCE(start < 0)) return -EINVAL; ret = idr_alloc_u32(idr, ptr, &id, end > 0 ? end - 1 : INT_MAX, gfp); if (ret) return ret; return id; } EXPORT_SYMBOL_GPL(idr_alloc); /** * idr_alloc_cyclic() - Allocate an ID cyclically. * @idr: IDR handle. * @ptr: Pointer to be associated with the new ID. * @start: The minimum ID (inclusive). * @end: The maximum ID (exclusive). * @gfp: Memory allocation flags. * * Allocates an unused ID in the range specified by @start and @end. If * @end is <= 0, it is treated as one larger than %INT_MAX. This allows * callers to use @start + N as @end as long as N is within integer range. * The search for an unused ID will start at the last ID allocated and will * wrap around to @start if no free IDs are found before reaching @end. * * The caller should provide their own locking to ensure that two * concurrent modifications to the IDR are not possible. Read-only * accesses to the IDR may be done under the RCU read lock or may * exclude simultaneous writers. * * Return: The newly allocated ID, -ENOMEM if memory allocation failed, * or -ENOSPC if no free IDs could be found. */ int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) { u32 id = idr->idr_next; int err, max = end > 0 ? end - 1 : INT_MAX; if ((int)id < start) id = start; err = idr_alloc_u32(idr, ptr, &id, max, gfp); if ((err == -ENOSPC) && (id > start)) { id = start; err = idr_alloc_u32(idr, ptr, &id, max, gfp); } if (err) return err; idr->idr_next = id + 1; return id; } EXPORT_SYMBOL(idr_alloc_cyclic); /** * idr_remove() - Remove an ID from the IDR. * @idr: IDR handle. * @id: Pointer ID. * * Removes this ID from the IDR. If the ID was not previously in the IDR, * this function returns %NULL. * * Since this function modifies the IDR, the caller should provide their * own locking to ensure that concurrent modification of the same IDR is * not possible. * * Return: The pointer formerly associated with this ID. */ void *idr_remove(struct idr *idr, unsigned long id) { return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL); } EXPORT_SYMBOL_GPL(idr_remove); /** * idr_find() - Return pointer for given ID. * @idr: IDR handle. * @id: Pointer ID. * * Looks up the pointer associated with this ID. A %NULL pointer may * indicate that @id is not allocated or that the %NULL pointer was * associated with this ID. * * This function can be called under rcu_read_lock(), given that the leaf * pointers lifetimes are correctly managed. * * Return: The pointer associated with this ID. */ void *idr_find(const struct idr *idr, unsigned long id) { return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base); } EXPORT_SYMBOL_GPL(idr_find); /** * idr_for_each() - Iterate through all stored pointers. * @idr: IDR handle. * @fn: Function to be called for each pointer. * @data: Data passed to callback function. * * The callback function will be called for each entry in @idr, passing * the ID, the entry and @data. * * If @fn returns anything other than %0, the iteration stops and that * value is returned from this function. * * idr_for_each() can be called concurrently with idr_alloc() and * idr_remove() if protected by RCU. Newly added entries may not be * seen and deleted entries may be seen, but adding and removing entries * will not cause other entries to be skipped, nor spurious ones to be seen. */ int idr_for_each(const struct idr *idr, int (*fn)(int id, void *p, void *data), void *data) { struct radix_tree_iter iter; void __rcu **slot; int base = idr->idr_base; radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) { int ret; unsigned long id = iter.index + base; if (WARN_ON_ONCE(id > INT_MAX)) break; ret = fn(id, rcu_dereference_raw(*slot), data); if (ret) return ret; } return 0; } EXPORT_SYMBOL(idr_for_each); /** * idr_get_next_ul() - Find next populated entry. * @idr: IDR handle. * @nextid: Pointer to an ID. * * Returns the next populated entry in the tree with an ID greater than * or equal to the value pointed to by @nextid. On exit, @nextid is updated * to the ID of the found value. To use in a loop, the value pointed to by * nextid must be incremented by the user. */ void *idr_get_next_ul(struct idr *idr, unsigned long *nextid) { struct radix_tree_iter iter; void __rcu **slot; void *entry = NULL; unsigned long base = idr->idr_base; unsigned long id = *nextid; id = (id < base) ? 0 : id - base; radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) { entry = rcu_dereference_raw(*slot); if (!entry) continue; if (!xa_is_internal(entry)) break; if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry)) break; slot = radix_tree_iter_retry(&iter); } if (!slot) return NULL; *nextid = iter.index + base; return entry; } EXPORT_SYMBOL(idr_get_next_ul); /** * idr_get_next() - Find next populated entry. * @idr: IDR handle. * @nextid: Pointer to an ID. * * Returns the next populated entry in the tree with an ID greater than * or equal to the value pointed to by @nextid. On exit, @nextid is updated * to the ID of the found value. To use in a loop, the value pointed to by * nextid must be incremented by the user. */ void *idr_get_next(struct idr *idr, int *nextid) { unsigned long id = *nextid; void *entry = idr_get_next_ul(idr, &id); if (WARN_ON_ONCE(id > INT_MAX)) return NULL; *nextid = id; return entry; } EXPORT_SYMBOL(idr_get_next); /** * idr_replace() - replace pointer for given ID. * @idr: IDR handle. * @ptr: New pointer to associate with the ID. * @id: ID to change. * * Replace the pointer registered with an ID and return the old value. * This function can be called under the RCU read lock concurrently with * idr_alloc() and idr_remove() (as long as the ID being removed is not * the one being replaced!). * * Returns: the old value on success. %-ENOENT indicates that @id was not * found. %-EINVAL indicates that @ptr was not valid. */ void *idr_replace(struct idr *idr, void *ptr, unsigned long id) { struct radix_tree_node *node; void __rcu **slot = NULL; void *entry; id -= idr->idr_base; entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) return ERR_PTR(-ENOENT); __radix_tree_replace(&idr->idr_rt, node, slot, ptr); return entry; } EXPORT_SYMBOL(idr_replace); /** * DOC: IDA description * * The IDA is an ID allocator which does not provide the ability to * associate an ID with a pointer. As such, it only needs to store one * bit per ID, and so is more space efficient than an IDR. To use an IDA, * define it using DEFINE_IDA() (or embed a &struct ida in a data structure, * then initialise it using ida_init()). To allocate a new ID, call * ida_alloc(), ida_alloc_min(), ida_alloc_max() or ida_alloc_range(). * To free an ID, call ida_free(). * * ida_destroy() can be used to dispose of an IDA without needing to * free the individual IDs in it. You can use ida_is_empty() to find * out whether the IDA has any IDs currently allocated. * * The IDA handles its own locking. It is safe to call any of the IDA * functions without synchronisation in your code. * * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward * limitation, it should be quite straightforward to raise the maximum. */ /* * Developer's notes: * * The IDA uses the functionality provided by the XArray to store bitmaps in * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap * have been set. * * I considered telling the XArray that each slot is an order-10 node * and indexing by bit number, but the XArray can't allow a single multi-index * entry in the head, which would significantly increase memory consumption * for the IDA. So instead we divide the index by the number of bits in the * leaf bitmap before doing a radix tree lookup. * * As an optimisation, if there are only a few low bits set in any given * leaf, instead of allocating a 128-byte bitmap, we store the bits * as a value entry. Value entries never have the XA_FREE_MARK cleared * because we can always convert them into a bitmap entry. * * It would be possible to optimise further; once we've run out of a * single 128-byte bitmap, we currently switch to a 576-byte node, put * the 128-byte bitmap in the first entry and then start allocating extra * 128-byte entries. We could instead use the 512 bytes of the node's * data as a bitmap before moving to that scheme. I do not believe this * is a worthwhile optimisation; Rasmus Villemoes surveyed the current * users of the IDA and almost none of them use more than 1024 entries. * Those that do use more than the 8192 IDs that the 512 bytes would * provide. * * The IDA always uses a lock to alloc/free. If we add a 'test_bit' * equivalent, it will still need locking. Going to RCU lookup would require * using RCU to free bitmaps, and that's not trivial without embedding an * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte * bitmap, which is excessive. */ /** * ida_alloc_range() - Allocate an unused ID. * @ida: IDA handle. * @min: Lowest ID to allocate. * @max: Highest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between @min and @max, inclusive. The allocated ID will * not exceed %INT_MAX, even if @max is larger. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, gfp_t gfp) { XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS); unsigned bit = min % IDA_BITMAP_BITS; unsigned long flags; struct ida_bitmap *bitmap, *alloc = NULL; if ((int)min < 0) return -ENOSPC; if ((int)max < 0) max = INT_MAX; retry: xas_lock_irqsave(&xas, flags); next: bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK); if (xas.xa_index > min / IDA_BITMAP_BITS) bit = 0; if (xas.xa_index * IDA_BITMAP_BITS + bit > max) goto nospc; if (xa_is_value(bitmap)) { unsigned long tmp = xa_to_value(bitmap); if (bit < BITS_PER_XA_VALUE) { bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit); if (xas.xa_index * IDA_BITMAP_BITS + bit > max) goto nospc; if (bit < BITS_PER_XA_VALUE) { tmp |= 1UL << bit; xas_store(&xas, xa_mk_value(tmp)); goto out; } } bitmap = alloc; if (!bitmap) bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); if (!bitmap) goto alloc; bitmap->bitmap[0] = tmp; xas_store(&xas, bitmap); if (xas_error(&xas)) { bitmap->bitmap[0] = 0; goto out; } } if (bitmap) { bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit); if (xas.xa_index * IDA_BITMAP_BITS + bit > max) goto nospc; if (bit == IDA_BITMAP_BITS) goto next; __set_bit(bit, bitmap->bitmap); if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) xas_clear_mark(&xas, XA_FREE_MARK); } else { if (bit < BITS_PER_XA_VALUE) { bitmap = xa_mk_value(1UL << bit); } else { bitmap = alloc; if (!bitmap) bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); if (!bitmap) goto alloc; __set_bit(bit, bitmap->bitmap); } xas_store(&xas, bitmap); } out: xas_unlock_irqrestore(&xas, flags); if (xas_nomem(&xas, gfp)) { xas.xa_index = min / IDA_BITMAP_BITS; bit = min % IDA_BITMAP_BITS; goto retry; } if (bitmap != alloc) kfree(alloc); if (xas_error(&xas)) return xas_error(&xas); return xas.xa_index * IDA_BITMAP_BITS + bit; alloc: xas_unlock_irqrestore(&xas, flags); alloc = kzalloc(sizeof(*bitmap), gfp); if (!alloc) return -ENOMEM; xas_set(&xas, min / IDA_BITMAP_BITS); bit = min % IDA_BITMAP_BITS; goto retry; nospc: xas_unlock_irqrestore(&xas, flags); kfree(alloc); return -ENOSPC; } EXPORT_SYMBOL(ida_alloc_range); /** * ida_free() - Release an allocated ID. * @ida: IDA handle. * @id: Previously allocated ID. * * Context: Any context. It is safe to call this function without * locking in your code. */ void ida_free(struct ida *ida, unsigned int id) { XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS); unsigned bit = id % IDA_BITMAP_BITS; struct ida_bitmap *bitmap; unsigned long flags; if ((int)id < 0) return; xas_lock_irqsave(&xas, flags); bitmap = xas_load(&xas); if (xa_is_value(bitmap)) { unsigned long v = xa_to_value(bitmap); if (bit >= BITS_PER_XA_VALUE) goto err; if (!(v & (1UL << bit))) goto err; v &= ~(1UL << bit); if (!v) goto delete; xas_store(&xas, xa_mk_value(v)); } else { if (!bitmap || !test_bit(bit, bitmap->bitmap)) goto err; __clear_bit(bit, bitmap->bitmap); xas_set_mark(&xas, XA_FREE_MARK); if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { kfree(bitmap); delete: xas_store(&xas, NULL); } } xas_unlock_irqrestore(&xas, flags); return; err: xas_unlock_irqrestore(&xas, flags); WARN(1, "ida_free called for id=%d which is not allocated.\n", id); } EXPORT_SYMBOL(ida_free); /** * ida_destroy() - Free all IDs. * @ida: IDA handle. * * Calling this function frees all IDs and releases all resources used * by an IDA. When this call returns, the IDA is empty and can be reused * or freed. If the IDA is already empty, there is no need to call this * function. * * Context: Any context. It is safe to call this function without * locking in your code. */ void ida_destroy(struct ida *ida) { XA_STATE(xas, &ida->xa, 0); struct ida_bitmap *bitmap; unsigned long flags; xas_lock_irqsave(&xas, flags); xas_for_each(&xas, bitmap, ULONG_MAX) { if (!xa_is_value(bitmap)) kfree(bitmap); xas_store(&xas, NULL); } xas_unlock_irqrestore(&xas, flags); } EXPORT_SYMBOL(ida_destroy); #ifndef __KERNEL__ extern void xa_dump_index(unsigned long index, unsigned int shift); #define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS) static void ida_dump_entry(void *entry, unsigned long index) { unsigned long i; if (!entry) return; if (xa_is_node(entry)) { struct xa_node *node = xa_to_node(entry); unsigned int shift = node->shift + IDA_CHUNK_SHIFT + XA_CHUNK_SHIFT; xa_dump_index(index * IDA_BITMAP_BITS, shift); xa_dump_node(node); for (i = 0; i < XA_CHUNK_SIZE; i++) ida_dump_entry(node->slots[i], index | (i << node->shift)); } else if (xa_is_value(entry)) { xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG)); pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry); } else { struct ida_bitmap *bitmap = entry; xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT); pr_cont("bitmap: %p data", bitmap); for (i = 0; i < IDA_BITMAP_LONGS; i++) pr_cont(" %lx", bitmap->bitmap[i]); pr_cont("\n"); } } static void ida_dump(struct ida *ida) { struct xarray *xa = &ida->xa; pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head, xa->xa_flags >> ROOT_TAG_SHIFT); ida_dump_entry(xa->xa_head, 0); } #endif
3 1 2 1 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/types.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> static const unsigned int nf_ct_generic_timeout = 600*HZ; #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { struct nf_generic_net *gn = nf_generic_pernet(net); unsigned int *timeout = data; if (!timeout) timeout = &gn->timeout; if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; else { /* Set default generic timeout. */ *timeout = gn->timeout; } return 0; } static int generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeout = data; if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_generic_init_net(struct net *net) { struct nf_generic_net *gn = nf_generic_pernet(net); gn->timeout = nf_ct_generic_timeout; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l4proto = 255, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = generic_timeout_nlattr_to_obj, .obj_to_nlattr = generic_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_GENERIC_MAX, .obj_size = sizeof(unsigned int), .nla_policy = generic_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ };
13 1 1 1 1 1153 1152 224 224 223 1 12 12 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 // SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * IB infrastructure: * Establish SMC-R as an Infiniband Client to be notified about added and * removed IB devices of type RDMA. * Determine device and port characteristics for these IB devices. * * Copyright IBM Corp. 2016 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #include <linux/etherdevice.h> #include <linux/if_vlan.h> #include <linux/random.h> #include <linux/workqueue.h> #include <linux/scatterlist.h> #include <linux/wait.h> #include <linux/mutex.h> #include <linux/inetdevice.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> #include "smc_pnet.h" #include "smc_ib.h" #include "smc_core.h" #include "smc_wr.h" #include "smc.h" #include "smc_netlink.h" #define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ #define SMC_QP_MIN_RNR_TIMER 5 #define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */ #define SMC_QP_RETRY_CNT 7 /* 7: infinite */ #define SMC_QP_RNR_RETRY 7 /* 7: infinite */ struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex), .list = LIST_HEAD_INIT(smc_ib_devices.list), }; u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ static int smc_ib_modify_qp_init(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_INIT; qp_attr.pkey_index = 0; qp_attr.port_num = lnk->ibport; qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_ACCESS_FLAGS | IB_QP_PORT); } static int smc_ib_modify_qp_rtr(struct smc_link *lnk) { enum ib_qp_attr_mask qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; struct ib_qp_attr qp_attr; u8 hop_lim = 1; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_RTR; qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) hop_lim = IPV6_DEFAULT_HOPLIMIT; rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0); rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac, sizeof(lnk->lgr->nexthop_mac)); else memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, sizeof(lnk->peer_mac)); qp_attr.dest_qp_num = lnk->peer_qpn; qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming * requests */ qp_attr.min_rnr_timer = SMC_QP_MIN_RNR_TIMER; return ib_modify_qp(lnk->roce_qp, &qp_attr, qp_attr_mask); } int smc_ib_modify_qp_rts(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_RTS; qp_attr.timeout = SMC_QP_TIMEOUT; /* local ack timeout */ qp_attr.retry_cnt = SMC_QP_RETRY_CNT; /* retry count */ qp_attr.rnr_retry = SMC_QP_RNR_RETRY; /* RNR retries, 7=infinite */ qp_attr.sq_psn = lnk->psn_initial; /* starting send packet seq # */ qp_attr.max_rd_atomic = 1; /* # of outstanding RDMA reads and * atomic ops allowed */ return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_SQ_PSN | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC); } int smc_ib_modify_qp_error(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); } int smc_ib_ready_link(struct smc_link *lnk) { struct smc_link_group *lgr = smc_get_lgr(lnk); int rc = 0; rc = smc_ib_modify_qp_init(lnk); if (rc) goto out; rc = smc_ib_modify_qp_rtr(lnk); if (rc) goto out; smc_wr_remember_qp_attr(lnk); rc = ib_req_notify_cq(lnk->smcibdev->roce_cq_recv, IB_CQ_SOLICITED_MASK); if (rc) goto out; rc = smc_wr_rx_post_init(lnk); if (rc) goto out; smc_wr_remember_qp_attr(lnk); if (lgr->role == SMC_SERV) { rc = smc_ib_modify_qp_rts(lnk); if (rc) goto out; smc_wr_remember_qp_attr(lnk); } out: return rc; } static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) { const struct ib_gid_attr *attr; int rc; attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); if (IS_ERR(attr)) return -ENODEV; rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]); rdma_put_gid_attr(attr); return rc; } /* Create an identifier unique for this instance of SMC-R. * The MAC-address of the first active registered IB device * plus a random 2-byte number is used to create this identifier. * This name is delivered to the peer during connection initialization. */ static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, u8 ibport) { memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], sizeof(smcibdev->mac[ibport - 1])); } bool smc_ib_is_valid_local_systemid(void) { return !is_zero_ether_addr(&local_systemid[2]); } static void smc_ib_init_local_systemid(void) { get_random_bytes(&local_systemid[0], 2); } bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) { return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway) { struct neighbour *neigh = NULL; struct rtable *rt = NULL; struct flowi4 fl4 = { .saddr = saddr, .daddr = daddr }; if (daddr == cpu_to_be32(INADDR_NONE)) goto out; rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto out; if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) goto out_rt; neigh = dst_neigh_lookup(&rt->dst, &fl4.daddr); if (!neigh) goto out_rt; memcpy(nexthop_mac, neigh->ha, ETH_ALEN); *uses_gateway = rt->rt_uses_gateway; neigh_release(neigh); ip_rt_put(rt); return 0; out_rt: ip_rt_put(rt); out: return -ENOENT; } static int smc_ib_determine_gid_rcu(const struct net_device *ndev, const struct ib_gid_attr *attr, u8 gid[], u8 *sgid_index, struct smc_init_info_smcrv2 *smcrv2) { if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) { if (gid) memcpy(gid, &attr->gid, SMC_GID_SIZE); if (sgid_index) *sgid_index = attr->index; return 0; } if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { struct in_device *in_dev = __in_dev_get_rcu(ndev); struct net *net = dev_net(ndev); const struct in_ifaddr *ifa; bool subnet_match = false; if (!in_dev) goto out; in_dev_for_each_ifa_rcu(ifa, in_dev) { if (!inet_ifa_match(smcrv2->saddr, ifa)) continue; subnet_match = true; break; } if (!subnet_match) goto out; if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr, smcrv2->daddr, smcrv2->nexthop_mac, &smcrv2->uses_gateway)) goto out; if (gid) memcpy(gid, &attr->gid, SMC_GID_SIZE); if (sgid_index) *sgid_index = attr->index; return 0; } out: return -ENODEV; } /* determine the gid for an ib-device port and vlan id */ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index, struct smc_init_info_smcrv2 *smcrv2) { const struct ib_gid_attr *attr; const struct net_device *ndev; int i; for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); if (IS_ERR(attr)) continue; rcu_read_lock(); ndev = rdma_read_gid_attr_ndev_rcu(attr); if (!IS_ERR(ndev) && ((!vlan_id && !is_vlan_dev(ndev)) || (vlan_id && is_vlan_dev(ndev) && vlan_dev_vlan_id(ndev) == vlan_id))) { if (!smc_ib_determine_gid_rcu(ndev, attr, gid, sgid_index, smcrv2)) { rcu_read_unlock(); rdma_put_gid_attr(attr); return 0; } } rcu_read_unlock(); rdma_put_gid_attr(attr); } return -ENODEV; } /* check if gid is still defined on smcibdev */ static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2, struct smc_ib_device *smcibdev, u8 ibport) { const struct ib_gid_attr *attr; bool rc = false; int i; for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); if (IS_ERR(attr)) continue; rcu_read_lock(); if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) || (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && !(ipv6_addr_type((const struct in6_addr *)&attr->gid) & IPV6_ADDR_LINKLOCAL))) if (!memcmp(gid, &attr->gid, SMC_GID_SIZE)) rc = true; rcu_read_unlock(); rdma_put_gid_attr(attr); } return rc; } /* check all links if the gid is still defined on smcibdev */ static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport) { struct smc_link_group *lgr; int i; spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry(lgr, &smc_lgr_list.list, list) { if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, SMC_MAX_PNETID_LEN)) continue; /* lgr is not affected */ if (list_empty(&lgr->list)) continue; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].state == SMC_LNK_UNUSED || lgr->lnk[i].smcibdev != smcibdev) continue; if (!smc_ib_check_link_gid(lgr->lnk[i].gid, lgr->smc_version == SMC_V2, smcibdev, ibport)) smcr_port_err(smcibdev, ibport); } } spin_unlock_bh(&smc_lgr_list.lock); } static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) { int rc; memset(&smcibdev->pattr[ibport - 1], 0, sizeof(smcibdev->pattr[ibport - 1])); rc = ib_query_port(smcibdev->ibdev, ibport, &smcibdev->pattr[ibport - 1]); if (rc) goto out; /* the SMC protocol requires specification of the RoCE MAC address */ rc = smc_ib_fill_mac(smcibdev, ibport); if (rc) goto out; if (!smc_ib_is_valid_local_systemid() && smc_ib_port_active(smcibdev, ibport)) /* create unique system identifier */ smc_ib_define_local_systemid(smcibdev, ibport); out: return rc; } /* process context wrapper for might_sleep smc_ib_remember_port_attr */ static void smc_ib_port_event_work(struct work_struct *work) { struct smc_ib_device *smcibdev = container_of( work, struct smc_ib_device, port_event_work); u8 port_idx; for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { smc_ib_remember_port_attr(smcibdev, port_idx + 1); clear_bit(port_idx, &smcibdev->port_event_mask); if (!smc_ib_port_active(smcibdev, port_idx + 1)) { set_bit(port_idx, smcibdev->ports_going_away); smcr_port_err(smcibdev, port_idx + 1); } else { clear_bit(port_idx, smcibdev->ports_going_away); smcr_port_add(smcibdev, port_idx + 1); smc_ib_gid_check(smcibdev, port_idx + 1); } } } /* can be called in IRQ context */ static void smc_ib_global_event_handler(struct ib_event_handler *handler, struct ib_event *ibevent) { struct smc_ib_device *smcibdev; bool schedule = false; u8 port_idx; smcibdev = container_of(handler, struct smc_ib_device, event_handler); switch (ibevent->event) { case IB_EVENT_DEVICE_FATAL: /* terminate all ports on device */ for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) { set_bit(port_idx, &smcibdev->port_event_mask); if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) schedule = true; } if (schedule) schedule_work(&smcibdev->port_event_work); break; case IB_EVENT_PORT_ACTIVE: port_idx = ibevent->element.port_num - 1; if (port_idx >= SMC_MAX_PORTS) break; set_bit(port_idx, &smcibdev->port_event_mask); if (test_and_clear_bit(port_idx, smcibdev->ports_going_away)) schedule_work(&smcibdev->port_event_work); break; case IB_EVENT_PORT_ERR: port_idx = ibevent->element.port_num - 1; if (port_idx >= SMC_MAX_PORTS) break; set_bit(port_idx, &smcibdev->port_event_mask); if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) schedule_work(&smcibdev->port_event_work); break; case IB_EVENT_GID_CHANGE: port_idx = ibevent->element.port_num - 1; if (port_idx >= SMC_MAX_PORTS) break; set_bit(port_idx, &smcibdev->port_event_mask); schedule_work(&smcibdev->port_event_work); break; default: break; } } void smc_ib_dealloc_protection_domain(struct smc_link *lnk) { if (lnk->roce_pd) ib_dealloc_pd(lnk->roce_pd); lnk->roce_pd = NULL; } int smc_ib_create_protection_domain(struct smc_link *lnk) { int rc; lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); rc = PTR_ERR_OR_ZERO(lnk->roce_pd); if (IS_ERR(lnk->roce_pd)) lnk->roce_pd = NULL; return rc; } static bool smcr_diag_is_dev_critical(struct smc_lgr_list *smc_lgr, struct smc_ib_device *smcibdev) { struct smc_link_group *lgr; bool rc = false; int i; spin_lock_bh(&smc_lgr->lock); list_for_each_entry(lgr, &smc_lgr->list, list) { if (lgr->is_smcd) continue; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].state == SMC_LNK_UNUSED || lgr->lnk[i].smcibdev != smcibdev) continue; if (lgr->type == SMC_LGR_SINGLE || lgr->type == SMC_LGR_ASYMMETRIC_LOCAL) { rc = true; goto out; } } } out: spin_unlock_bh(&smc_lgr->lock); return rc; } static int smc_nl_handle_dev_port(struct sk_buff *skb, struct ib_device *ibdev, struct smc_ib_device *smcibdev, int port) { char smc_pnet[SMC_MAX_PNETID_LEN + 1]; struct nlattr *port_attrs; unsigned char port_state; int lnk_count = 0; port_attrs = nla_nest_start(skb, SMC_NLA_DEV_PORT + port); if (!port_attrs) goto errout; if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcibdev->pnetid_by_user[port])) goto errattr; memcpy(smc_pnet, &smcibdev->pnetid[port], SMC_MAX_PNETID_LEN); smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) goto errattr; if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV, smcibdev->ndev_ifidx[port])) goto errattr; if (nla_put_u8(skb, SMC_NLA_DEV_PORT_VALID, 1)) goto errattr; port_state = smc_ib_port_active(smcibdev, port + 1); if (nla_put_u8(skb, SMC_NLA_DEV_PORT_STATE, port_state)) goto errattr; lnk_count = atomic_read(&smcibdev->lnk_cnt_by_port[port]); if (nla_put_u32(skb, SMC_NLA_DEV_PORT_LNK_CNT, lnk_count)) goto errattr; nla_nest_end(skb, port_attrs); return 0; errattr: nla_nest_cancel(skb, port_attrs); errout: return -EMSGSIZE; } static bool smc_nl_handle_pci_values(const struct smc_pci_dev *smc_pci_dev, struct sk_buff *skb) { if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev->pci_fid)) return false; if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev->pci_pchid)) return false; if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev->pci_vendor)) return false; if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev->pci_device)) return false; if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev->pci_id)) return false; return true; } static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, struct sk_buff *skb, struct netlink_callback *cb) { char smc_ibname[IB_DEVICE_NAME_MAX]; struct smc_pci_dev smc_pci_dev; struct pci_dev *pci_dev; unsigned char is_crit; struct nlattr *attrs; void *nlh; int i; nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &smc_gen_nl_family, NLM_F_MULTI, SMC_NETLINK_GET_DEV_SMCR); if (!nlh) goto errmsg; attrs = nla_nest_start(skb, SMC_GEN_DEV_SMCR); if (!attrs) goto errout; is_crit = smcr_diag_is_dev_critical(&smc_lgr_list, smcibdev); if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, is_crit)) goto errattr; if (smcibdev->ibdev->dev.parent) { memset(&smc_pci_dev, 0, sizeof(smc_pci_dev)); pci_dev = to_pci_dev(smcibdev->ibdev->dev.parent); smc_set_pci_values(pci_dev, &smc_pci_dev); if (!smc_nl_handle_pci_values(&smc_pci_dev, skb)) goto errattr; } snprintf(smc_ibname, sizeof(smc_ibname), "%s", smcibdev->ibdev->name); if (nla_put_string(skb, SMC_NLA_DEV_IB_NAME, smc_ibname)) goto errattr; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(smcibdev->ibdev, i)) continue; if (smc_nl_handle_dev_port(skb, smcibdev->ibdev, smcibdev, i - 1)) goto errattr; } nla_nest_end(skb, attrs); genlmsg_end(skb, nlh); return 0; errattr: nla_nest_cancel(skb, attrs); errout: genlmsg_cancel(skb, nlh); errmsg: return -EMSGSIZE; } static void smc_nl_prep_smcr_dev(struct smc_ib_devices *dev_list, struct sk_buff *skb, struct netlink_callback *cb) { struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); struct smc_ib_device *smcibdev; int snum = cb_ctx->pos[0]; int num = 0; mutex_lock(&dev_list->mutex); list_for_each_entry(smcibdev, &dev_list->list, list) { if (num < snum) goto next; if (smc_nl_handle_smcr_dev(smcibdev, skb, cb)) goto errout; next: num++; } errout: mutex_unlock(&dev_list->mutex); cb_ctx->pos[0] = num; } int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb) { smc_nl_prep_smcr_dev(&smc_ib_devices, skb, cb); return skb->len; } static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { struct smc_link *lnk = (struct smc_link *)priv; struct smc_ib_device *smcibdev = lnk->smcibdev; u8 port_idx; switch (ibevent->event) { case IB_EVENT_QP_FATAL: case IB_EVENT_QP_ACCESS_ERR: port_idx = ibevent->element.qp->port - 1; if (port_idx >= SMC_MAX_PORTS) break; set_bit(port_idx, &smcibdev->port_event_mask); if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) schedule_work(&smcibdev->port_event_work); break; default: break; } } void smc_ib_destroy_queue_pair(struct smc_link *lnk) { if (lnk->roce_qp) ib_destroy_qp(lnk->roce_qp); lnk->roce_qp = NULL; } /* create a queue pair within the protection domain for a link */ int smc_ib_create_queue_pair(struct smc_link *lnk) { struct ib_qp_init_attr qp_attr = { .event_handler = smc_ib_qp_event_handler, .qp_context = lnk, .send_cq = lnk->smcibdev->roce_cq_send, .recv_cq = lnk->smcibdev->roce_cq_recv, .srq = NULL, .cap = { /* include unsolicited rdma_writes as well, * there are max. 2 RDMA_WRITE per 1 WR_SEND */ .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = lnk->wr_rx_sge_cnt, .max_inline_data = 0, }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, }; int rc; lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr); rc = PTR_ERR_OR_ZERO(lnk->roce_qp); if (IS_ERR(lnk->roce_qp)) lnk->roce_qp = NULL; else smc_wr_remember_qp_attr(lnk); return rc; } void smc_ib_put_memory_region(struct ib_mr *mr) { ib_dereg_mr(mr); } static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx) { unsigned int offset = 0; int sg_num; /* map the largest prefix of a dma mapped SG list */ sg_num = ib_map_mr_sg(buf_slot->mr[link_idx], buf_slot->sgt[link_idx].sgl, buf_slot->sgt[link_idx].orig_nents, &offset, PAGE_SIZE); return sg_num; } /* Allocate a memory region and map the dma mapped SG list of buf_slot */ int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx) { if (buf_slot->mr[link_idx]) return 0; /* already done */ buf_slot->mr[link_idx] = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); if (IS_ERR(buf_slot->mr[link_idx])) { int rc; rc = PTR_ERR(buf_slot->mr[link_idx]); buf_slot->mr[link_idx] = NULL; return rc; } if (smc_ib_map_mr_sg(buf_slot, link_idx) != buf_slot->sgt[link_idx].orig_nents) return -EINVAL; return 0; } bool smc_ib_is_sg_need_sync(struct smc_link *lnk, struct smc_buf_desc *buf_slot) { struct scatterlist *sg; unsigned int i; bool ret = false; /* for now there is just one DMA address */ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, buf_slot->sgt[lnk->link_idx].nents, i) { if (!sg_dma_len(sg)) break; if (dma_need_sync(lnk->smcibdev->ibdev->dma_device, sg_dma_address(sg))) { ret = true; goto out; } } out: return ret; } /* synchronize buffer usage for cpu access */ void smc_ib_sync_sg_for_cpu(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { struct scatterlist *sg; unsigned int i; if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx))) return; /* for now there is just one DMA address */ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, buf_slot->sgt[lnk->link_idx].nents, i) { if (!sg_dma_len(sg)) break; ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev, sg_dma_address(sg), sg_dma_len(sg), data_direction); } } /* synchronize buffer usage for device access */ void smc_ib_sync_sg_for_device(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { struct scatterlist *sg; unsigned int i; if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx))) return; /* for now there is just one DMA address */ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, buf_slot->sgt[lnk->link_idx].nents, i) { if (!sg_dma_len(sg)) break; ib_dma_sync_single_for_device(lnk->smcibdev->ibdev, sg_dma_address(sg), sg_dma_len(sg), data_direction); } } /* Map a new TX or RX buffer SG-table to DMA */ int smc_ib_buf_map_sg(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { int mapped_nents; mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev, buf_slot->sgt[lnk->link_idx].sgl, buf_slot->sgt[lnk->link_idx].orig_nents, data_direction); if (!mapped_nents) return -ENOMEM; return mapped_nents; } void smc_ib_buf_unmap_sg(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address) return; /* already unmapped */ ib_dma_unmap_sg(lnk->smcibdev->ibdev, buf_slot->sgt[lnk->link_idx].sgl, buf_slot->sgt[lnk->link_idx].orig_nents, data_direction); buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0; } long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) { struct ib_cq_init_attr cqattr = { .cqe = SMC_MAX_CQE, .comp_vector = 0 }; int cqe_size_order, smc_order; long rc; mutex_lock(&smcibdev->mutex); rc = 0; if (smcibdev->initialized) goto out; /* the calculated number of cq entries fits to mlx5 cq allocation */ cqe_size_order = cache_line_size() == 128 ? 7 : 6; smc_order = MAX_PAGE_ORDER - cqe_size_order; if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, smc_wr_tx_cq_handler, NULL, smcibdev, &cqattr); rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send); if (IS_ERR(smcibdev->roce_cq_send)) { smcibdev->roce_cq_send = NULL; goto out; } smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev, smc_wr_rx_cq_handler, NULL, smcibdev, &cqattr); rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_recv); if (IS_ERR(smcibdev->roce_cq_recv)) { smcibdev->roce_cq_recv = NULL; goto err; } smc_wr_add_dev(smcibdev); smcibdev->initialized = 1; goto out; err: ib_destroy_cq(smcibdev->roce_cq_send); out: mutex_unlock(&smcibdev->mutex); return rc; } static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) { mutex_lock(&smcibdev->mutex); if (!smcibdev->initialized) goto out; smcibdev->initialized = 0; ib_destroy_cq(smcibdev->roce_cq_recv); ib_destroy_cq(smcibdev->roce_cq_send); smc_wr_remove_dev(smcibdev); out: mutex_unlock(&smcibdev->mutex); } static struct ib_client smc_ib_client; static void smc_copy_netdev_ifindex(struct smc_ib_device *smcibdev, int port) { struct ib_device *ibdev = smcibdev->ibdev; struct net_device *ndev; ndev = ib_device_get_netdev(ibdev, port + 1); if (ndev) { smcibdev->ndev_ifidx[port] = ndev->ifindex; dev_put(ndev); } } void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) { struct smc_ib_device *smcibdev; struct ib_device *libdev; struct net_device *lndev; u8 port_cnt; int i; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { port_cnt = smcibdev->ibdev->phys_port_cnt; for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { libdev = smcibdev->ibdev; lndev = ib_device_get_netdev(libdev, i + 1); dev_put(lndev); if (lndev != ndev) continue; if (event == NETDEV_REGISTER) smcibdev->ndev_ifidx[i] = ndev->ifindex; if (event == NETDEV_UNREGISTER) smcibdev->ndev_ifidx[i] = 0; } } mutex_unlock(&smc_ib_devices.mutex); } /* callback function for ib_register_client() */ static int smc_ib_add_dev(struct ib_device *ibdev) { struct smc_ib_device *smcibdev; u8 port_cnt; int i; if (ibdev->node_type != RDMA_NODE_IB_CA) return -EOPNOTSUPP; smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL); if (!smcibdev) return -ENOMEM; smcibdev->ibdev = ibdev; INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); atomic_set(&smcibdev->lnk_cnt, 0); init_waitqueue_head(&smcibdev->lnks_deleted); mutex_init(&smcibdev->mutex); mutex_lock(&smc_ib_devices.mutex); list_add_tail(&smcibdev->list, &smc_ib_devices.list); mutex_unlock(&smc_ib_devices.mutex); ib_set_client_data(ibdev, &smc_ib_client, smcibdev); INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, smc_ib_global_event_handler); ib_register_event_handler(&smcibdev->event_handler); /* trigger reading of the port attributes */ port_cnt = smcibdev->ibdev->phys_port_cnt; pr_warn_ratelimited("smc: adding ib device %s with port count %d\n", smcibdev->ibdev->name, port_cnt); for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { set_bit(i, &smcibdev->port_event_mask); /* determine pnetids of the port */ if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, smcibdev->pnetid[i])) smc_pnetid_by_table_ib(smcibdev, i + 1); smc_copy_netdev_ifindex(smcibdev, i); pr_warn_ratelimited("smc: ib device %s port %d has pnetid " "%.16s%s\n", smcibdev->ibdev->name, i + 1, smcibdev->pnetid[i], smcibdev->pnetid_by_user[i] ? " (user defined)" : ""); } schedule_work(&smcibdev->port_event_work); return 0; } /* callback function for ib_unregister_client() */ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { struct smc_ib_device *smcibdev = client_data; mutex_lock(&smc_ib_devices.mutex); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ mutex_unlock(&smc_ib_devices.mutex); pr_warn_ratelimited("smc: removing ib device %s\n", smcibdev->ibdev->name); smc_smcr_terminate_all(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); cancel_work_sync(&smcibdev->port_event_work); kfree(smcibdev); } static struct ib_client smc_ib_client = { .name = "smc_ib", .add = smc_ib_add_dev, .remove = smc_ib_remove_dev, }; int __init smc_ib_register_client(void) { smc_ib_init_local_systemid(); return ib_register_client(&smc_ib_client); } void smc_ib_unregister_client(void) { ib_unregister_client(&smc_ib_client); }
17 55 57 10 2 8 10 10 10 15 1 10 10 11 11 9 2 11 11 11 11 11 17 2 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 // SPDX-License-Identifier: GPL-2.0 /* * usb port device code * * Copyright (C) 2012 Intel Corp * * Author: Lan Tianyu <tianyu.lan@intel.com> */ #include <linux/kstrtox.h> #include <linux/slab.h> #include <linux/string_choices.h> #include <linux/sysfs.h> #include <linux/pm_qos.h> #include <linux/component.h> #include <linux/usb/of.h> #include "hub.h" static int usb_port_block_power_off; static const struct attribute_group *port_dev_group[]; static ssize_t early_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "%s\n", str_yes_no(port_dev->early_stop)); } static ssize_t early_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); bool value; if (kstrtobool(buf, &value)) return -EINVAL; if (value) port_dev->early_stop = 1; else port_dev->early_stop = 0; return count; } static DEVICE_ATTR_RW(early_stop); static ssize_t disable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; u16 portstatus, unused; bool disabled; int rc; struct kernfs_node *kn; if (!hub) return -ENODEV; hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) goto out_hub_get; /* * Prevent deadlock if another process is concurrently * trying to unregister hdev. */ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); if (!kn) { rc = -ENODEV; goto out_autopm; } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; goto out_hdev_lock; } usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); out_hdev_lock: usb_unlock_device(hdev); sysfs_unbreak_active_protection(kn); out_autopm: usb_autopm_put_interface(intf); out_hub_get: hub_put(hub); if (rc) return rc; return sysfs_emit(buf, "%s\n", disabled ? "1" : "0"); } static ssize_t disable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; bool disabled; int rc; struct kernfs_node *kn; if (!hub) return -ENODEV; rc = kstrtobool(buf, &disabled); if (rc) return rc; hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) goto out_hub_get; /* * Prevent deadlock if another process is concurrently * trying to unregister hdev. */ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); if (!kn) { rc = -ENODEV; goto out_autopm; } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; goto out_hdev_lock; } if (disabled && port_dev->child) usb_disconnect(&port_dev->child); rc = usb_hub_set_port_power(hdev, hub, port1, !disabled); if (disabled) { usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); } if (!rc) rc = count; out_hdev_lock: usb_unlock_device(hdev); sysfs_unbreak_active_protection(kn); out_autopm: usb_autopm_put_interface(intf); out_hub_get: hub_put(hub); return rc; } static DEVICE_ATTR_RW(disable); static ssize_t location_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "0x%08x\n", port_dev->location); } static DEVICE_ATTR_RO(location); static ssize_t connect_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); char *result; switch (port_dev->connect_type) { case USB_PORT_CONNECT_TYPE_HOT_PLUG: result = "hotplug"; break; case USB_PORT_CONNECT_TYPE_HARD_WIRED: result = "hardwired"; break; case USB_PORT_NOT_USED: result = "not used"; break; default: result = "unknown"; break; } return sysfs_emit(buf, "%s\n", result); } static DEVICE_ATTR_RO(connect_type); static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); enum usb_device_state state = READ_ONCE(port_dev->state); return sysfs_emit(buf, "%s\n", usb_state_string(state)); } static DEVICE_ATTR_RO(state); static ssize_t over_current_count_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "%u\n", port_dev->over_current_count); } static DEVICE_ATTR_RO(over_current_count); static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "%08x\n", port_dev->quirks); } static ssize_t quirks_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); u32 value; if (kstrtou32(buf, 16, &value)) return -EINVAL; port_dev->quirks = value; return count; } static DEVICE_ATTR_RW(quirks); static ssize_t usb3_lpm_permit_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); const char *p; if (port_dev->usb3_lpm_u1_permit) { if (port_dev->usb3_lpm_u2_permit) p = "u1_u2"; else p = "u1"; } else { if (port_dev->usb3_lpm_u2_permit) p = "u2"; else p = "0"; } return sysfs_emit(buf, "%s\n", p); } static ssize_t usb3_lpm_permit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *udev = port_dev->child; struct usb_hcd *hcd; if (!strncmp(buf, "u1_u2", 5)) { port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 1; } else if (!strncmp(buf, "u1", 2)) { port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 0; } else if (!strncmp(buf, "u2", 2)) { port_dev->usb3_lpm_u1_permit = 0; port_dev->usb3_lpm_u2_permit = 1; } else if (!strncmp(buf, "0", 1)) { port_dev->usb3_lpm_u1_permit = 0; port_dev->usb3_lpm_u2_permit = 0; } else return -EINVAL; /* If device is connected to the port, disable or enable lpm * to make new u1 u2 setting take effect immediately. */ if (udev) { hcd = bus_to_hcd(udev->bus); if (!hcd) return -EINVAL; usb_lock_device(udev); mutex_lock(hcd->bandwidth_mutex); if (!usb_disable_lpm(udev)) usb_enable_lpm(udev); mutex_unlock(hcd->bandwidth_mutex); usb_unlock_device(udev); } return count; } static DEVICE_ATTR_RW(usb3_lpm_permit); static struct attribute *port_dev_attrs[] = { &dev_attr_connect_type.attr, &dev_attr_state.attr, &dev_attr_location.attr, &dev_attr_quirks.attr, &dev_attr_over_current_count.attr, &dev_attr_disable.attr, &dev_attr_early_stop.attr, NULL, }; static const struct attribute_group port_dev_attr_grp = { .attrs = port_dev_attrs, }; static const struct attribute_group *port_dev_group[] = { &port_dev_attr_grp, NULL, }; static struct attribute *port_dev_usb3_attrs[] = { &dev_attr_usb3_lpm_permit.attr, NULL, }; static const struct attribute_group port_dev_usb3_attr_grp = { .attrs = port_dev_usb3_attrs, }; static const struct attribute_group *port_dev_usb3_group[] = { &port_dev_attr_grp, &port_dev_usb3_attr_grp, NULL, }; static void usb_port_device_release(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); kfree(port_dev->req); kfree(port_dev); } #ifdef CONFIG_PM static int usb_port_runtime_resume(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_interface *intf = to_usb_interface(dev->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_device *udev = port_dev->child; struct usb_port *peer = port_dev->peer; int port1 = port_dev->portnum; int retval; if (!hub) return -EINVAL; if (hub->in_reset) { set_bit(port1, hub->power_bits); return 0; } /* * Power on our usb3 peer before this usb2 port to prevent a usb3 * device from degrading to its usb2 connection */ if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); retval = usb_autopm_get_interface(intf); if (retval < 0) return retval; retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { /* * Our preference is to simply wait for the port to reconnect, * as that is the lowest latency method to restart the port. * However, there are cases where toggling port power results in * the host port and the device port getting out of sync causing * a link training live lock. Upon timeout, flag the port as * needing warm reset recovery (to be performed later by * usb_port_resume() as requested via usb_wakeup_notification()) */ if (hub_port_debounce_be_connected(hub, port1) < 0) { dev_dbg(&port_dev->dev, "reconnect timeout\n"); if (hub_is_superspeed(hdev)) set_bit(port1, hub->warm_reset_bits); } /* Force the child awake to revalidate after the power loss. */ if (!test_and_set_bit(port1, hub->child_usage_bits)) { pm_runtime_get_noresume(&port_dev->dev); pm_request_resume(&udev->dev); } } usb_autopm_put_interface(intf); return retval; } static int usb_port_runtime_suspend(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_interface *intf = to_usb_interface(dev->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_port *peer = port_dev->peer; int port1 = port_dev->portnum; int retval; if (!hub) return -EINVAL; if (hub->in_reset) return -EBUSY; if (dev_pm_qos_flags(&port_dev->dev, PM_QOS_FLAG_NO_POWER_OFF) == PM_QOS_FLAGS_ALL) return -EAGAIN; if (usb_port_block_power_off) return -EBUSY; retval = usb_autopm_get_interface(intf); if (retval < 0) return retval; retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); usb_autopm_put_interface(intf); /* * Our peer usb3 port may now be able to suspend, so * asynchronously queue a suspend request to observe that this * usb2 port is now off. */ if (!port_dev->is_superspeed && peer) pm_runtime_put(&peer->dev); return retval; } #endif static void usb_port_shutdown(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *udev = port_dev->child; if (udev && !udev->port_is_suspended) { usb_disable_usb2_hardware_lpm(udev); usb_unlocked_disable_lpm(udev); } } static const struct dev_pm_ops usb_port_pm_ops = { #ifdef CONFIG_PM .runtime_suspend = usb_port_runtime_suspend, .runtime_resume = usb_port_runtime_resume, #endif }; const struct device_type usb_port_device_type = { .name = "usb_port", .release = usb_port_device_release, .pm = &usb_port_pm_ops, }; static struct device_driver usb_port_driver = { .name = "usb", .owner = THIS_MODULE, .shutdown = usb_port_shutdown, }; static int link_peers(struct usb_port *left, struct usb_port *right) { struct usb_port *ss_port, *hs_port; int rc; if (left->peer == right && right->peer == left) return 0; if (left->peer || right->peer) { struct usb_port *lpeer = left->peer; struct usb_port *rpeer = right->peer; char *method; if (left->location && left->location == right->location) method = "location"; else method = "default"; pr_debug("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n", dev_name(&left->dev), dev_name(&right->dev), method, dev_name(&left->dev), lpeer ? dev_name(&lpeer->dev) : "none", dev_name(&right->dev), rpeer ? dev_name(&rpeer->dev) : "none"); return -EBUSY; } rc = sysfs_create_link(&left->dev.kobj, &right->dev.kobj, "peer"); if (rc) return rc; rc = sysfs_create_link(&right->dev.kobj, &left->dev.kobj, "peer"); if (rc) { sysfs_remove_link(&left->dev.kobj, "peer"); return rc; } /* * We need to wake the HiSpeed port to make sure we don't race * setting ->peer with usb_port_runtime_suspend(). Otherwise we * may miss a suspend event for the SuperSpeed port. */ if (left->is_superspeed) { ss_port = left; WARN_ON(right->is_superspeed); hs_port = right; } else { ss_port = right; WARN_ON(!right->is_superspeed); hs_port = left; } pm_runtime_get_sync(&hs_port->dev); left->peer = right; right->peer = left; /* * The SuperSpeed reference is dropped when the HiSpeed port in * this relationship suspends, i.e. when it is safe to allow a * SuperSpeed connection to drop since there is no risk of a * device degrading to its powered-off HiSpeed connection. * * Also, drop the HiSpeed ref taken above. */ pm_runtime_get_sync(&ss_port->dev); pm_runtime_put(&hs_port->dev); return 0; } static void link_peers_report(struct usb_port *left, struct usb_port *right) { int rc; rc = link_peers(left, right); if (rc == 0) { dev_dbg(&left->dev, "peered to %s\n", dev_name(&right->dev)); } else { dev_dbg(&left->dev, "failed to peer to %s (%d)\n", dev_name(&right->dev), rc); pr_warn_once("usb: port power management may be unreliable\n"); usb_port_block_power_off = 1; } } static void unlink_peers(struct usb_port *left, struct usb_port *right) { struct usb_port *ss_port, *hs_port; WARN(right->peer != left || left->peer != right, "%s and %s are not peers?\n", dev_name(&left->dev), dev_name(&right->dev)); /* * We wake the HiSpeed port to make sure we don't race its * usb_port_runtime_resume() event which takes a SuperSpeed ref * when ->peer is !NULL. */ if (left->is_superspeed) { ss_port = left; hs_port = right; } else { ss_port = right; hs_port = left; } pm_runtime_get_sync(&hs_port->dev); sysfs_remove_link(&left->dev.kobj, "peer"); right->peer = NULL; sysfs_remove_link(&right->dev.kobj, "peer"); left->peer = NULL; /* Drop the SuperSpeed ref held on behalf of the active HiSpeed port */ pm_runtime_put(&ss_port->dev); /* Drop the ref taken above */ pm_runtime_put(&hs_port->dev); } /* * For each usb hub device in the system check to see if it is in the * peer domain of the given port_dev, and if it is check to see if it * has a port that matches the given port by location */ static int match_location(struct usb_device *peer_hdev, void *p) { int port1; struct usb_hcd *hcd, *peer_hcd; struct usb_port *port_dev = p, *peer; struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED) return 0; hcd = bus_to_hcd(hdev->bus); peer_hcd = bus_to_hcd(peer_hdev->bus); /* peer_hcd is provisional until we verify it against the known peer */ if (peer_hcd != hcd->shared_hcd) return 0; for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; if (peer && peer->connect_type != USB_PORT_NOT_USED && peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } } return 0; } /* * Find the peer port either via explicit platform firmware "location" * data, the peer hcd for root hubs, or the upstream peer relationship * for all other hubs. */ static void find_and_link_peer(struct usb_hub *hub, int port1) { struct usb_port *port_dev = hub->ports[port1 - 1], *peer; struct usb_device *hdev = hub->hdev; struct usb_device *peer_hdev; struct usb_hub *peer_hub; /* * If location data is available then we can only peer this port * by a location match, not the default peer (lest we create a * situation where we need to go back and undo a default peering * when the port is later peered by location data) */ if (port_dev->location) { /* we link the peer in match_location() if found */ usb_for_each_dev(port_dev, match_location); return; } else if (!hdev->parent) { struct usb_hcd *hcd = bus_to_hcd(hdev->bus); struct usb_hcd *peer_hcd = hcd->shared_hcd; if (!peer_hcd) return; peer_hdev = peer_hcd->self.root_hub; } else { struct usb_port *upstream; struct usb_device *parent = hdev->parent; struct usb_hub *parent_hub = usb_hub_to_struct_hub(parent); if (!parent_hub) return; upstream = parent_hub->ports[hdev->portnum - 1]; if (!upstream || !upstream->peer) return; peer_hdev = upstream->peer->child; } peer_hub = usb_hub_to_struct_hub(peer_hdev); if (!peer_hub || port1 > peer_hdev->maxchild) return; /* * we found a valid default peer, last check is to make sure it * does not have location data */ peer = peer_hub->ports[port1 - 1]; if (peer && peer->location == 0) link_peers_report(port_dev, peer); } static int connector_bind(struct device *dev, struct device *connector, void *data) { struct usb_port *port_dev = to_usb_port(dev); int ret; ret = sysfs_create_link(&dev->kobj, &connector->kobj, "connector"); if (ret) return ret; ret = sysfs_create_link(&connector->kobj, &dev->kobj, dev_name(dev)); if (ret) { sysfs_remove_link(&dev->kobj, "connector"); return ret; } port_dev->connector = data; /* * If there is already USB device connected to the port, letting the * Type-C connector know about it immediately. */ if (port_dev->child) typec_attach(port_dev->connector, &port_dev->child->dev); return 0; } static void connector_unbind(struct device *dev, struct device *connector, void *data) { struct usb_port *port_dev = to_usb_port(dev); sysfs_remove_link(&connector->kobj, dev_name(dev)); sysfs_remove_link(&dev->kobj, "connector"); port_dev->connector = NULL; } static const struct component_ops connector_ops = { .bind = connector_bind, .unbind = connector_unbind, }; int usb_hub_create_port_device(struct usb_hub *hub, int port1) { struct usb_port *port_dev; struct usb_device *hdev = hub->hdev; int retval; port_dev = kzalloc(sizeof(*port_dev), GFP_KERNEL); if (!port_dev) return -ENOMEM; port_dev->req = kzalloc(sizeof(*(port_dev->req)), GFP_KERNEL); if (!port_dev->req) { kfree(port_dev); return -ENOMEM; } port_dev->connect_type = usb_of_get_connect_type(hdev, port1); hub->ports[port1 - 1] = port_dev; port_dev->portnum = port1; set_bit(port1, hub->power_bits); port_dev->dev.parent = hub->intfdev; if (hub_is_superspeed(hdev)) { port_dev->is_superspeed = 1; port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 1; port_dev->dev.groups = port_dev_usb3_group; } else port_dev->dev.groups = port_dev_group; port_dev->dev.type = &usb_port_device_type; port_dev->dev.driver = &usb_port_driver; dev_set_name(&port_dev->dev, "%s-port%d", dev_name(&hub->hdev->dev), port1); mutex_init(&port_dev->status_lock); retval = device_register(&port_dev->dev); if (retval) { put_device(&port_dev->dev); return retval; } port_dev->state_kn = sysfs_get_dirent(port_dev->dev.kobj.sd, "state"); if (!port_dev->state_kn) { dev_err(&port_dev->dev, "failed to sysfs_get_dirent 'state'\n"); retval = -ENODEV; goto err_unregister; } /* Set default policy of port-poweroff disabled. */ retval = dev_pm_qos_add_request(&port_dev->dev, port_dev->req, DEV_PM_QOS_FLAGS, PM_QOS_FLAG_NO_POWER_OFF); if (retval < 0) { goto err_put_kn; } retval = component_add(&port_dev->dev, &connector_ops); if (retval) { dev_warn(&port_dev->dev, "failed to add component\n"); goto err_put_kn; } find_and_link_peer(hub, port1); /* * Enable runtime pm and hold a refernce that hub_configure() * will drop once the PM_QOS_NO_POWER_OFF flag state has been set * and the hub has been fully registered (hdev->maxchild set). */ pm_runtime_set_active(&port_dev->dev); pm_runtime_get_noresume(&port_dev->dev); pm_runtime_enable(&port_dev->dev); device_enable_async_suspend(&port_dev->dev); /* * Keep hidden the ability to enable port-poweroff if the hub * does not support power switching. */ if (!hub_is_port_power_switchable(hub)) return 0; /* Attempt to let userspace take over the policy. */ retval = dev_pm_qos_expose_flags(&port_dev->dev, PM_QOS_FLAG_NO_POWER_OFF); if (retval < 0) { dev_warn(&port_dev->dev, "failed to expose pm_qos_no_poweroff\n"); return 0; } /* Userspace owns the policy, drop the kernel 'no_poweroff' request. */ retval = dev_pm_qos_remove_request(port_dev->req); if (retval >= 0) { kfree(port_dev->req); port_dev->req = NULL; } return 0; err_put_kn: sysfs_put(port_dev->state_kn); err_unregister: device_unregister(&port_dev->dev); return retval; } void usb_hub_remove_port_device(struct usb_hub *hub, int port1) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_port *peer; peer = port_dev->peer; if (peer) unlink_peers(port_dev, peer); component_del(&port_dev->dev, &connector_ops); sysfs_put(port_dev->state_kn); device_unregister(&port_dev->dev); }
1 1 1 2 2 2 2 2 2 1 1 1 3 3 3 3 3 3 3 3 3 20 20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/cfm_bridge.h> #include <uapi/linux/cfm_bridge.h> #include "br_private_cfm.h" static struct br_cfm_mep *br_mep_find(struct net_bridge *br, u32 instance) { struct br_cfm_mep *mep; hlist_for_each_entry(mep, &br->mep_list, head) if (mep->instance == instance) return mep; return NULL; } static struct br_cfm_mep *br_mep_find_ifindex(struct net_bridge *br, u32 ifindex) { struct br_cfm_mep *mep; hlist_for_each_entry_rcu(mep, &br->mep_list, head, lockdep_rtnl_is_held()) if (mep->create.ifindex == ifindex) return mep; return NULL; } static struct br_cfm_peer_mep *br_peer_mep_find(struct br_cfm_mep *mep, u32 mepid) { struct br_cfm_peer_mep *peer_mep; hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head, lockdep_rtnl_is_held()) if (peer_mep->mepid == mepid) return peer_mep; return NULL; } static struct net_bridge_port *br_mep_get_port(struct net_bridge *br, u32 ifindex) { struct net_bridge_port *port; list_for_each_entry(port, &br->port_list, list) if (port->dev->ifindex == ifindex) return port; return NULL; } /* Calculate the CCM interval in us. */ static u32 interval_to_us(enum br_cfm_ccm_interval interval) { switch (interval) { case BR_CFM_CCM_INTERVAL_NONE: return 0; case BR_CFM_CCM_INTERVAL_3_3_MS: return 3300; case BR_CFM_CCM_INTERVAL_10_MS: return 10 * 1000; case BR_CFM_CCM_INTERVAL_100_MS: return 100 * 1000; case BR_CFM_CCM_INTERVAL_1_SEC: return 1000 * 1000; case BR_CFM_CCM_INTERVAL_10_SEC: return 10 * 1000 * 1000; case BR_CFM_CCM_INTERVAL_1_MIN: return 60 * 1000 * 1000; case BR_CFM_CCM_INTERVAL_10_MIN: return 10 * 60 * 1000 * 1000; } return 0; } /* Convert the interface interval to CCM PDU value. */ static u32 interval_to_pdu(enum br_cfm_ccm_interval interval) { switch (interval) { case BR_CFM_CCM_INTERVAL_NONE: return 0; case BR_CFM_CCM_INTERVAL_3_3_MS: return 1; case BR_CFM_CCM_INTERVAL_10_MS: return 2; case BR_CFM_CCM_INTERVAL_100_MS: return 3; case BR_CFM_CCM_INTERVAL_1_SEC: return 4; case BR_CFM_CCM_INTERVAL_10_SEC: return 5; case BR_CFM_CCM_INTERVAL_1_MIN: return 6; case BR_CFM_CCM_INTERVAL_10_MIN: return 7; } return 0; } /* Convert the CCM PDU value to interval on interface. */ static u32 pdu_to_interval(u32 value) { switch (value) { case 0: return BR_CFM_CCM_INTERVAL_NONE; case 1: return BR_CFM_CCM_INTERVAL_3_3_MS; case 2: return BR_CFM_CCM_INTERVAL_10_MS; case 3: return BR_CFM_CCM_INTERVAL_100_MS; case 4: return BR_CFM_CCM_INTERVAL_1_SEC; case 5: return BR_CFM_CCM_INTERVAL_10_SEC; case 6: return BR_CFM_CCM_INTERVAL_1_MIN; case 7: return BR_CFM_CCM_INTERVAL_10_MIN; } return BR_CFM_CCM_INTERVAL_NONE; } static void ccm_rx_timer_start(struct br_cfm_peer_mep *peer_mep) { u32 interval_us; interval_us = interval_to_us(peer_mep->mep->cc_config.exp_interval); /* Function ccm_rx_dwork must be called with 1/4 * of the configured CC 'expected_interval' * in order to detect CCM defect after 3.25 interval. */ queue_delayed_work(system_wq, &peer_mep->ccm_rx_dwork, usecs_to_jiffies(interval_us / 4)); } static void br_cfm_notify(int event, const struct net_bridge_port *port) { u32 filter = RTEXT_FILTER_CFM_STATUS; br_info_notify(event, port->br, NULL, filter); } static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep) { memset(&peer_mep->cc_status, 0, sizeof(peer_mep->cc_status)); peer_mep->ccm_rx_count_miss = 0; ccm_rx_timer_start(peer_mep); } static void cc_peer_disable(struct br_cfm_peer_mep *peer_mep) { cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); } static struct sk_buff *ccm_frame_build(struct br_cfm_mep *mep, const struct br_cfm_cc_ccm_tx_info *const tx_info) { struct br_cfm_common_hdr *common_hdr; struct net_bridge_port *b_port; struct br_cfm_maid *maid; u8 *itu_reserved, *e_tlv; struct ethhdr *eth_hdr; struct sk_buff *skb; __be32 *status_tlv; __be32 *snumber; __be16 *mepid; skb = dev_alloc_skb(CFM_CCM_MAX_FRAME_LENGTH); if (!skb) return NULL; rcu_read_lock(); b_port = rcu_dereference(mep->b_port); if (!b_port) { kfree_skb(skb); rcu_read_unlock(); return NULL; } skb->dev = b_port->dev; rcu_read_unlock(); /* The device cannot be deleted until the work_queue functions has * completed. This function is called from ccm_tx_work_expired() * that is a work_queue functions. */ skb->protocol = htons(ETH_P_CFM); skb->priority = CFM_FRAME_PRIO; /* Ethernet header */ eth_hdr = skb_put(skb, sizeof(*eth_hdr)); ether_addr_copy(eth_hdr->h_dest, tx_info->dmac.addr); ether_addr_copy(eth_hdr->h_source, mep->config.unicast_mac.addr); eth_hdr->h_proto = htons(ETH_P_CFM); /* Common CFM Header */ common_hdr = skb_put(skb, sizeof(*common_hdr)); common_hdr->mdlevel_version = mep->config.mdlevel << 5; common_hdr->opcode = BR_CFM_OPCODE_CCM; common_hdr->flags = (mep->rdi << 7) | interval_to_pdu(mep->cc_config.exp_interval); common_hdr->tlv_offset = CFM_CCM_TLV_OFFSET; /* Sequence number */ snumber = skb_put(skb, sizeof(*snumber)); if (tx_info->seq_no_update) { *snumber = cpu_to_be32(mep->ccm_tx_snumber); mep->ccm_tx_snumber += 1; } else { *snumber = 0; } mepid = skb_put(skb, sizeof(*mepid)); *mepid = cpu_to_be16((u16)mep->config.mepid); maid = skb_put(skb, sizeof(*maid)); memcpy(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data)); /* ITU reserved (CFM_CCM_ITU_RESERVED_SIZE octets) */ itu_reserved = skb_put(skb, CFM_CCM_ITU_RESERVED_SIZE); memset(itu_reserved, 0, CFM_CCM_ITU_RESERVED_SIZE); /* Generel CFM TLV format: * TLV type: one byte * TLV value length: two bytes * TLV value: 'TLV value length' bytes */ /* Port status TLV. The value length is 1. Total of 4 bytes. */ if (tx_info->port_tlv) { status_tlv = skb_put(skb, sizeof(*status_tlv)); *status_tlv = cpu_to_be32((CFM_PORT_STATUS_TLV_TYPE << 24) | (1 << 8) | /* Value length */ (tx_info->port_tlv_value & 0xFF)); } /* Interface status TLV. The value length is 1. Total of 4 bytes. */ if (tx_info->if_tlv) { status_tlv = skb_put(skb, sizeof(*status_tlv)); *status_tlv = cpu_to_be32((CFM_IF_STATUS_TLV_TYPE << 24) | (1 << 8) | /* Value length */ (tx_info->if_tlv_value & 0xFF)); } /* End TLV */ e_tlv = skb_put(skb, sizeof(*e_tlv)); *e_tlv = CFM_ENDE_TLV_TYPE; return skb; } static void ccm_frame_tx(struct sk_buff *skb) { skb_reset_network_header(skb); dev_queue_xmit(skb); } /* This function is called with the configured CC 'expected_interval' * in order to drive CCM transmission when enabled. */ static void ccm_tx_work_expired(struct work_struct *work) { struct delayed_work *del_work; struct br_cfm_mep *mep; struct sk_buff *skb; u32 interval_us; del_work = to_delayed_work(work); mep = container_of(del_work, struct br_cfm_mep, ccm_tx_dwork); if (time_before_eq(mep->ccm_tx_end, jiffies)) { /* Transmission period has ended */ mep->cc_ccm_tx_info.period = 0; return; } skb = ccm_frame_build(mep, &mep->cc_ccm_tx_info); if (skb) ccm_frame_tx(skb); interval_us = interval_to_us(mep->cc_config.exp_interval); queue_delayed_work(system_wq, &mep->ccm_tx_dwork, usecs_to_jiffies(interval_us)); } /* This function is called with 1/4 of the configured CC 'expected_interval' * in order to detect CCM defect after 3.25 interval. */ static void ccm_rx_work_expired(struct work_struct *work) { struct br_cfm_peer_mep *peer_mep; struct net_bridge_port *b_port; struct delayed_work *del_work; del_work = to_delayed_work(work); peer_mep = container_of(del_work, struct br_cfm_peer_mep, ccm_rx_dwork); /* After 13 counts (4 * 3,25) then 3.25 intervals are expired */ if (peer_mep->ccm_rx_count_miss < 13) { /* 3.25 intervals are NOT expired without CCM reception */ peer_mep->ccm_rx_count_miss++; /* Start timer again */ ccm_rx_timer_start(peer_mep); } else { /* 3.25 intervals are expired without CCM reception. * CCM defect detected */ peer_mep->cc_status.ccm_defect = true; /* Change in CCM defect status - notify */ rcu_read_lock(); b_port = rcu_dereference(peer_mep->mep->b_port); if (b_port) br_cfm_notify(RTM_NEWLINK, b_port); rcu_read_unlock(); } } static u32 ccm_tlv_extract(struct sk_buff *skb, u32 index, struct br_cfm_peer_mep *peer_mep) { __be32 *s_tlv; __be32 _s_tlv; u32 h_s_tlv; u8 *e_tlv; u8 _e_tlv; e_tlv = skb_header_pointer(skb, index, sizeof(_e_tlv), &_e_tlv); if (!e_tlv) return 0; /* TLV is present - get the status TLV */ s_tlv = skb_header_pointer(skb, index, sizeof(_s_tlv), &_s_tlv); if (!s_tlv) return 0; h_s_tlv = ntohl(*s_tlv); if ((h_s_tlv >> 24) == CFM_IF_STATUS_TLV_TYPE) { /* Interface status TLV */ peer_mep->cc_status.tlv_seen = true; peer_mep->cc_status.if_tlv_value = (h_s_tlv & 0xFF); } if ((h_s_tlv >> 24) == CFM_PORT_STATUS_TLV_TYPE) { /* Port status TLV */ peer_mep->cc_status.tlv_seen = true; peer_mep->cc_status.port_tlv_value = (h_s_tlv & 0xFF); } /* The Sender ID TLV is not handled */ /* The Organization-Specific TLV is not handled */ /* Return the length of this tlv. * This is the length of the value field plus 3 bytes for size of type * field and length field */ return ((h_s_tlv >> 8) & 0xFFFF) + 3; } /* note: already called with rcu_read_lock */ static int br_cfm_frame_rx(struct net_bridge_port *port, struct sk_buff *skb) { u32 mdlevel, interval, size, index, max; const struct br_cfm_common_hdr *hdr; struct br_cfm_peer_mep *peer_mep; const struct br_cfm_maid *maid; struct br_cfm_common_hdr _hdr; struct br_cfm_maid _maid; struct br_cfm_mep *mep; struct net_bridge *br; __be32 *snumber; __be32 _snumber; __be16 *mepid; __be16 _mepid; if (port->state == BR_STATE_DISABLED) return 0; hdr = skb_header_pointer(skb, 0, sizeof(_hdr), &_hdr); if (!hdr) return 1; br = port->br; mep = br_mep_find_ifindex(br, port->dev->ifindex); if (unlikely(!mep)) /* No MEP on this port - must be forwarded */ return 0; mdlevel = hdr->mdlevel_version >> 5; if (mdlevel > mep->config.mdlevel) /* The level is above this MEP level - must be forwarded */ return 0; if ((hdr->mdlevel_version & 0x1F) != 0) { /* Invalid version */ mep->status.version_unexp_seen = true; return 1; } if (mdlevel < mep->config.mdlevel) { /* The level is below this MEP level */ mep->status.rx_level_low_seen = true; return 1; } if (hdr->opcode == BR_CFM_OPCODE_CCM) { /* CCM PDU received. */ /* MA ID is after common header + sequence number + MEP ID */ maid = skb_header_pointer(skb, CFM_CCM_PDU_MAID_OFFSET, sizeof(_maid), &_maid); if (!maid) return 1; if (memcmp(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data))) /* MA ID not as expected */ return 1; /* MEP ID is after common header + sequence number */ mepid = skb_header_pointer(skb, CFM_CCM_PDU_MEPID_OFFSET, sizeof(_mepid), &_mepid); if (!mepid) return 1; peer_mep = br_peer_mep_find(mep, (u32)ntohs(*mepid)); if (!peer_mep) return 1; /* Interval is in common header flags */ interval = hdr->flags & 0x07; if (mep->cc_config.exp_interval != pdu_to_interval(interval)) /* Interval not as expected */ return 1; /* A valid CCM frame is received */ if (peer_mep->cc_status.ccm_defect) { peer_mep->cc_status.ccm_defect = false; /* Change in CCM defect status - notify */ br_cfm_notify(RTM_NEWLINK, port); /* Start CCM RX timer */ ccm_rx_timer_start(peer_mep); } peer_mep->cc_status.seen = true; peer_mep->ccm_rx_count_miss = 0; /* RDI is in common header flags */ peer_mep->cc_status.rdi = (hdr->flags & 0x80) ? true : false; /* Sequence number is after common header */ snumber = skb_header_pointer(skb, CFM_CCM_PDU_SEQNR_OFFSET, sizeof(_snumber), &_snumber); if (!snumber) return 1; if (ntohl(*snumber) != (mep->ccm_rx_snumber + 1)) /* Unexpected sequence number */ peer_mep->cc_status.seq_unexp_seen = true; mep->ccm_rx_snumber = ntohl(*snumber); /* TLV end is after common header + sequence number + MEP ID + * MA ID + ITU reserved */ index = CFM_CCM_PDU_TLV_OFFSET; max = 0; do { /* Handle all TLVs */ size = ccm_tlv_extract(skb, index, peer_mep); index += size; max += 1; } while (size != 0 && max < 4); /* Max four TLVs possible */ return 1; } mep->status.opcode_unexp_seen = true; return 1; } static struct br_frame_type cfm_frame_type __read_mostly = { .type = cpu_to_be16(ETH_P_CFM), .frame_handler = br_cfm_frame_rx, }; int br_cfm_mep_create(struct net_bridge *br, const u32 instance, struct br_cfm_mep_create *const create, struct netlink_ext_ack *extack) { struct net_bridge_port *p; struct br_cfm_mep *mep; ASSERT_RTNL(); if (create->domain == BR_CFM_VLAN) { NL_SET_ERR_MSG_MOD(extack, "VLAN domain not supported"); return -EINVAL; } if (create->domain != BR_CFM_PORT) { NL_SET_ERR_MSG_MOD(extack, "Invalid domain value"); return -EINVAL; } if (create->direction == BR_CFM_MEP_DIRECTION_UP) { NL_SET_ERR_MSG_MOD(extack, "Up-MEP not supported"); return -EINVAL; } if (create->direction != BR_CFM_MEP_DIRECTION_DOWN) { NL_SET_ERR_MSG_MOD(extack, "Invalid direction value"); return -EINVAL; } p = br_mep_get_port(br, create->ifindex); if (!p) { NL_SET_ERR_MSG_MOD(extack, "Port is not related to bridge"); return -EINVAL; } mep = br_mep_find(br, instance); if (mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance already exists"); return -EEXIST; } /* In PORT domain only one instance can be created per port */ if (create->domain == BR_CFM_PORT) { mep = br_mep_find_ifindex(br, create->ifindex); if (mep) { NL_SET_ERR_MSG_MOD(extack, "Only one Port MEP on a port allowed"); return -EINVAL; } } mep = kzalloc(sizeof(*mep), GFP_KERNEL); if (!mep) return -ENOMEM; mep->create = *create; mep->instance = instance; rcu_assign_pointer(mep->b_port, p); INIT_HLIST_HEAD(&mep->peer_mep_list); INIT_DELAYED_WORK(&mep->ccm_tx_dwork, ccm_tx_work_expired); if (hlist_empty(&br->mep_list)) br_add_frame(br, &cfm_frame_type); hlist_add_tail_rcu(&mep->head, &br->mep_list); return 0; } static void mep_delete_implementation(struct net_bridge *br, struct br_cfm_mep *mep) { struct br_cfm_peer_mep *peer_mep; struct hlist_node *n_store; ASSERT_RTNL(); /* Empty and free peer MEP list */ hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) { cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); } cancel_delayed_work_sync(&mep->ccm_tx_dwork); RCU_INIT_POINTER(mep->b_port, NULL); hlist_del_rcu(&mep->head); kfree_rcu(mep, rcu); if (hlist_empty(&br->mep_list)) br_del_frame(br, &cfm_frame_type); } int br_cfm_mep_delete(struct net_bridge *br, const u32 instance, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep_delete_implementation(br, mep); return 0; } int br_cfm_mep_config_set(struct net_bridge *br, const u32 instance, const struct br_cfm_mep_config *const config, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep->config = *config; return 0; } int br_cfm_cc_config_set(struct net_bridge *br, const u32 instance, const struct br_cfm_cc_config *const config, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } /* Check for no change in configuration */ if (memcmp(config, &mep->cc_config, sizeof(*config)) == 0) return 0; if (config->enable && !mep->cc_config.enable) /* CC is enabled */ hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) cc_peer_enable(peer_mep); if (!config->enable && mep->cc_config.enable) /* CC is disabled */ hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) cc_peer_disable(peer_mep); mep->cc_config = *config; mep->ccm_rx_snumber = 0; mep->ccm_tx_snumber = 1; return 0; } int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance, u32 mepid, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } peer_mep = br_peer_mep_find(mep, mepid); if (peer_mep) { NL_SET_ERR_MSG_MOD(extack, "Peer MEP-ID already exists"); return -EEXIST; } peer_mep = kzalloc(sizeof(*peer_mep), GFP_KERNEL); if (!peer_mep) return -ENOMEM; peer_mep->mepid = mepid; peer_mep->mep = mep; INIT_DELAYED_WORK(&peer_mep->ccm_rx_dwork, ccm_rx_work_expired); if (mep->cc_config.enable) cc_peer_enable(peer_mep); hlist_add_tail_rcu(&peer_mep->head, &mep->peer_mep_list); return 0; } int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance, u32 mepid, struct netlink_ext_ack *extack) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } peer_mep = br_peer_mep_find(mep, mepid); if (!peer_mep) { NL_SET_ERR_MSG_MOD(extack, "Peer MEP-ID does not exists"); return -ENOENT; } cc_peer_disable(peer_mep); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); return 0; } int br_cfm_cc_rdi_set(struct net_bridge *br, const u32 instance, const bool rdi, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } mep->rdi = rdi; return 0; } int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance, const struct br_cfm_cc_ccm_tx_info *const tx_info, struct netlink_ext_ack *extack) { struct br_cfm_mep *mep; ASSERT_RTNL(); mep = br_mep_find(br, instance); if (!mep) { NL_SET_ERR_MSG_MOD(extack, "MEP instance does not exists"); return -ENOENT; } if (memcmp(tx_info, &mep->cc_ccm_tx_info, sizeof(*tx_info)) == 0) { /* No change in tx_info. */ if (mep->cc_ccm_tx_info.period == 0) /* Transmission is not enabled - just return */ return 0; /* Transmission is ongoing, the end time is recalculated */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); return 0; } if (tx_info->period == 0 && mep->cc_ccm_tx_info.period == 0) /* Some change in info and transmission is not ongoing */ goto save; if (tx_info->period != 0 && mep->cc_ccm_tx_info.period != 0) { /* Some change in info and transmission is ongoing * The end time is recalculated */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); goto save; } if (tx_info->period == 0 && mep->cc_ccm_tx_info.period != 0) { cancel_delayed_work_sync(&mep->ccm_tx_dwork); goto save; } /* Start delayed work to transmit CCM frames. It is done with zero delay * to send first frame immediately */ mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); queue_delayed_work(system_wq, &mep->ccm_tx_dwork, 0); save: mep->cc_ccm_tx_info = *tx_info; return 0; } int br_cfm_mep_count(struct net_bridge *br, u32 *count) { struct br_cfm_mep *mep; *count = 0; rcu_read_lock(); hlist_for_each_entry_rcu(mep, &br->mep_list, head) *count += 1; rcu_read_unlock(); return 0; } int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; *count = 0; rcu_read_lock(); hlist_for_each_entry_rcu(mep, &br->mep_list, head) hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) *count += 1; rcu_read_unlock(); return 0; } bool br_cfm_created(struct net_bridge *br) { return !hlist_empty(&br->mep_list); } /* Deletes the CFM instances on a specific bridge port */ void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port) { struct hlist_node *n_store; struct br_cfm_mep *mep; ASSERT_RTNL(); hlist_for_each_entry_safe(mep, n_store, &br->mep_list, head) if (mep->create.ifindex == port->dev->ifindex) mep_delete_implementation(br, mep); }
6 14 23 40 32 2 5 41 11 40 297 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 /* SPDX-License-Identifier: GPL-2.0 */ /* * fscrypt_private.h * * Copyright (C) 2015, Google, Inc. * * Originally written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar. * Heavily modified since then. */ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H #include <linux/fscrypt.h> #include <linux/siphash.h> #include <crypto/hash.h> #include <linux/blk-crypto.h> #define CONST_STRLEN(str) (sizeof(str) - 1) #define FSCRYPT_FILE_NONCE_SIZE 16 /* * Minimum size of an fscrypt master key. Note: a longer key will be required * if ciphers with a 256-bit security strength are used. This is just the * absolute minimum, which applies when only 128-bit encryption is used. */ #define FSCRYPT_MIN_KEY_SIZE 16 #define FSCRYPT_CONTEXT_V1 1 #define FSCRYPT_CONTEXT_V2 2 /* Keep this in sync with include/uapi/linux/fscrypt.h */ #define FSCRYPT_MODE_MAX FSCRYPT_MODE_AES_256_HCTR2 struct fscrypt_context_v1 { u8 version; /* FSCRYPT_CONTEXT_V1 */ u8 contents_encryption_mode; u8 filenames_encryption_mode; u8 flags; u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; }; struct fscrypt_context_v2 { u8 version; /* FSCRYPT_CONTEXT_V2 */ u8 contents_encryption_mode; u8 filenames_encryption_mode; u8 flags; u8 log2_data_unit_size; u8 __reserved[3]; u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; }; /* * fscrypt_context - the encryption context of an inode * * This is the on-disk equivalent of an fscrypt_policy, stored alongside each * encrypted file usually in a hidden extended attribute. It contains the * fields from the fscrypt_policy, in order to identify the encryption algorithm * and key with which the file is encrypted. It also contains a nonce that was * randomly generated by fscrypt itself; this is used as KDF input or as a tweak * to cause different files to be encrypted differently. */ union fscrypt_context { u8 version; struct fscrypt_context_v1 v1; struct fscrypt_context_v2 v2; }; /* * Return the size expected for the given fscrypt_context based on its version * number, or 0 if the context version is unrecognized. */ static inline int fscrypt_context_size(const union fscrypt_context *ctx) { switch (ctx->version) { case FSCRYPT_CONTEXT_V1: BUILD_BUG_ON(sizeof(ctx->v1) != 28); return sizeof(ctx->v1); case FSCRYPT_CONTEXT_V2: BUILD_BUG_ON(sizeof(ctx->v2) != 40); return sizeof(ctx->v2); } return 0; } /* Check whether an fscrypt_context has a recognized version number and size */ static inline bool fscrypt_context_is_valid(const union fscrypt_context *ctx, int ctx_size) { return ctx_size >= 1 && ctx_size == fscrypt_context_size(ctx); } /* Retrieve the context's nonce, assuming the context was already validated */ static inline const u8 *fscrypt_context_nonce(const union fscrypt_context *ctx) { switch (ctx->version) { case FSCRYPT_CONTEXT_V1: return ctx->v1.nonce; case FSCRYPT_CONTEXT_V2: return ctx->v2.nonce; } WARN_ON_ONCE(1); return NULL; } union fscrypt_policy { u8 version; struct fscrypt_policy_v1 v1; struct fscrypt_policy_v2 v2; }; /* * Return the size expected for the given fscrypt_policy based on its version * number, or 0 if the policy version is unrecognized. */ static inline int fscrypt_policy_size(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return sizeof(policy->v1); case FSCRYPT_POLICY_V2: return sizeof(policy->v2); } return 0; } /* Return the contents encryption mode of a valid encryption policy */ static inline u8 fscrypt_policy_contents_mode(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return policy->v1.contents_encryption_mode; case FSCRYPT_POLICY_V2: return policy->v2.contents_encryption_mode; } BUG(); } /* Return the filenames encryption mode of a valid encryption policy */ static inline u8 fscrypt_policy_fnames_mode(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return policy->v1.filenames_encryption_mode; case FSCRYPT_POLICY_V2: return policy->v2.filenames_encryption_mode; } BUG(); } /* Return the flags (FSCRYPT_POLICY_FLAG*) of a valid encryption policy */ static inline u8 fscrypt_policy_flags(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return policy->v1.flags; case FSCRYPT_POLICY_V2: return policy->v2.flags; } BUG(); } static inline int fscrypt_policy_v2_du_bits(const struct fscrypt_policy_v2 *policy, const struct inode *inode) { return policy->log2_data_unit_size ?: inode->i_blkbits; } static inline int fscrypt_policy_du_bits(const union fscrypt_policy *policy, const struct inode *inode) { switch (policy->version) { case FSCRYPT_POLICY_V1: return inode->i_blkbits; case FSCRYPT_POLICY_V2: return fscrypt_policy_v2_du_bits(&policy->v2, inode); } BUG(); } /* * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. */ struct fscrypt_symlink_data { __le16 len; char encrypted_path[]; } __packed; /** * struct fscrypt_prepared_key - a key prepared for actual encryption/decryption * @tfm: crypto API transform object * @blk_key: key for blk-crypto * * Normally only one of the fields will be non-NULL. */ struct fscrypt_prepared_key { struct crypto_skcipher *tfm; #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT struct blk_crypto_key *blk_key; #endif }; /* * fscrypt_inode_info - the "encryption key" for an inode * * When an encrypted file's key is made available, an instance of this struct is * allocated and stored in ->i_crypt_info. Once created, it remains until the * inode is evicted. */ struct fscrypt_inode_info { /* The key in a form prepared for actual encryption/decryption */ struct fscrypt_prepared_key ci_enc_key; /* True if ci_enc_key should be freed when this struct is freed */ u8 ci_owns_key : 1; #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT /* * True if this inode will use inline encryption (blk-crypto) instead of * the traditional filesystem-layer encryption. */ u8 ci_inlinecrypt : 1; #endif /* True if ci_dirhash_key is initialized */ u8 ci_dirhash_key_initialized : 1; /* * log2 of the data unit size (granularity of contents encryption) of * this file. This is computable from ci_policy and ci_inode but is * cached here for efficiency. Only used for regular files. */ u8 ci_data_unit_bits; /* Cached value: log2 of number of data units per FS block */ u8 ci_data_units_per_block_bits; /* Hashed inode number. Only set for IV_INO_LBLK_32 */ u32 ci_hashed_ino; /* * Encryption mode used for this inode. It corresponds to either the * contents or filenames encryption mode, depending on the inode type. */ struct fscrypt_mode *ci_mode; /* Back-pointer to the inode */ struct inode *ci_inode; /* * The master key with which this inode was unlocked (decrypted). This * will be NULL if the master key was found in a process-subscribed * keyring rather than in the filesystem-level keyring. */ struct fscrypt_master_key *ci_master_key; /* * Link in list of inodes that were unlocked with the master key. * Only used when ->ci_master_key is set. */ struct list_head ci_master_key_link; /* * If non-NULL, then encryption is done using the master key directly * and ci_enc_key will equal ci_direct_key->dk_key. */ struct fscrypt_direct_key *ci_direct_key; /* * This inode's hash key for filenames. This is a 128-bit SipHash-2-4 * key. This is only set for directories that use a keyed dirhash over * the plaintext filenames -- currently just casefolded directories. */ siphash_key_t ci_dirhash_key; /* The encryption policy used by this inode */ union fscrypt_policy ci_policy; /* This inode's nonce, copied from the fscrypt_context */ u8 ci_nonce[FSCRYPT_FILE_NONCE_SIZE]; }; typedef enum { FS_DECRYPT = 0, FS_ENCRYPT, } fscrypt_direction_t; /* crypto.c */ extern struct kmem_cache *fscrypt_inode_info_cachep; int fscrypt_initialize(struct super_block *sb); int fscrypt_crypt_data_unit(const struct fscrypt_inode_info *ci, fscrypt_direction_t rw, u64 index, struct page *src_page, struct page *dest_page, unsigned int len, unsigned int offs, gfp_t gfp_flags); struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); void __printf(3, 4) __cold fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); #define fscrypt_warn(inode, fmt, ...) \ fscrypt_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__) #define fscrypt_err(inode, fmt, ...) \ fscrypt_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__) #define FSCRYPT_MAX_IV_SIZE 32 union fscrypt_iv { struct { /* zero-based index of data unit within the file */ __le64 index; /* per-file nonce; only set in DIRECT_KEY mode */ u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; }; u8 raw[FSCRYPT_MAX_IV_SIZE]; __le64 dun[FSCRYPT_MAX_IV_SIZE / sizeof(__le64)]; }; void fscrypt_generate_iv(union fscrypt_iv *iv, u64 index, const struct fscrypt_inode_info *ci); /* * Return the number of bits used by the maximum file data unit index that is * possible on the given filesystem, using the given log2 data unit size. */ static inline int fscrypt_max_file_dun_bits(const struct super_block *sb, int du_bits) { return fls64(sb->s_maxbytes - 1) - du_bits; } /* fname.c */ bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); /* hkdf.c */ struct fscrypt_hkdf { struct crypto_shash *hmac_tfm; }; int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, unsigned int master_key_size); /* * The list of contexts in which fscrypt uses HKDF. These values are used as * the first byte of the HKDF application-specific info string to guarantee that * info strings are never repeated between contexts. This ensures that all HKDF * outputs are unique and cryptographically isolated, i.e. knowledge of one * output doesn't reveal another. */ #define HKDF_CONTEXT_KEY_IDENTIFIER 1 /* info=<empty> */ #define HKDF_CONTEXT_PER_FILE_ENC_KEY 2 /* info=file_nonce */ #define HKDF_CONTEXT_DIRECT_KEY 3 /* info=mode_num */ #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 /* info=mode_num||fs_uuid */ #define HKDF_CONTEXT_DIRHASH_KEY 5 /* info=file_nonce */ #define HKDF_CONTEXT_IV_INO_LBLK_32_KEY 6 /* info=mode_num||fs_uuid */ #define HKDF_CONTEXT_INODE_HASH_KEY 7 /* info=<empty> */ int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen); void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci); static inline bool fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci) { return ci->ci_inlinecrypt; } int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_inode_info *ci); void fscrypt_destroy_inline_crypt_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key); /* * Check whether the crypto transform or blk-crypto key has been allocated in * @prep_key, depending on which encryption implementation the file will use. */ static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_inode_info *ci) { /* * The two smp_load_acquire()'s here pair with the smp_store_release()'s * in fscrypt_prepare_inline_crypt_key() and fscrypt_prepare_key(). * I.e., in some cases (namely, if this prep_key is a per-mode * encryption key) another task can publish blk_key or tfm concurrently, * executing a RELEASE barrier. We need to use smp_load_acquire() here * to safely ACQUIRE the memory the other task published. */ if (fscrypt_using_inline_encryption(ci)) return smp_load_acquire(&prep_key->blk_key) != NULL; return smp_load_acquire(&prep_key->tfm) != NULL; } #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci) { return 0; } static inline bool fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci) { return false; } static inline int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_inode_info *ci) { WARN_ON_ONCE(1); return -EOPNOTSUPP; } static inline void fscrypt_destroy_inline_crypt_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key) { } static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_inode_info *ci) { return smp_load_acquire(&prep_key->tfm) != NULL; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /* keyring.c */ /* * fscrypt_master_key_secret - secret key material of an in-use master key */ struct fscrypt_master_key_secret { /* * For v2 policy keys: HKDF context keyed by this master key. * For v1 policy keys: not set (hkdf.hmac_tfm == NULL). */ struct fscrypt_hkdf hkdf; /* * Size of the raw key in bytes. This remains set even if ->raw was * zeroized due to no longer being needed. I.e. we still remember the * size of the key even if we don't need to remember the key itself. */ u32 size; /* For v1 policy keys: the raw key. Wiped for v2 policy keys. */ u8 raw[FSCRYPT_MAX_KEY_SIZE]; } __randomize_layout; /* * fscrypt_master_key - an in-use master key * * This represents a master encryption key which has been added to the * filesystem. There are three high-level states that a key can be in: * * FSCRYPT_KEY_STATUS_PRESENT * Key is fully usable; it can be used to unlock inodes that are encrypted * with it (this includes being able to create new inodes). ->mk_present * indicates whether the key is in this state. ->mk_secret exists, the key * is in the keyring, and ->mk_active_refs > 0 due to ->mk_present. * * FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED * Removal of this key has been initiated, but some inodes that were * unlocked with it are still in-use. Like ABSENT, ->mk_secret is wiped, * and the key can no longer be used to unlock inodes. Unlike ABSENT, the * key is still in the keyring; ->mk_decrypted_inodes is nonempty; and * ->mk_active_refs > 0, being equal to the size of ->mk_decrypted_inodes. * * This state transitions to ABSENT if ->mk_decrypted_inodes becomes empty, * or to PRESENT if FS_IOC_ADD_ENCRYPTION_KEY is called again for this key. * * FSCRYPT_KEY_STATUS_ABSENT * Key is fully removed. The key is no longer in the keyring, * ->mk_decrypted_inodes is empty, ->mk_active_refs == 0, ->mk_secret is * wiped, and the key can no longer be used to unlock inodes. */ struct fscrypt_master_key { /* * Link in ->s_master_keys->key_hashtable. * Only valid if ->mk_active_refs > 0. */ struct hlist_node mk_node; /* Semaphore that protects ->mk_secret, ->mk_users, and ->mk_present */ struct rw_semaphore mk_sem; /* * Active and structural reference counts. An active ref guarantees * that the struct continues to exist, continues to be in the keyring * ->s_master_keys, and that any embedded subkeys (e.g. * ->mk_direct_keys) that have been prepared continue to exist. * A structural ref only guarantees that the struct continues to exist. * * There is one active ref associated with ->mk_present being true, and * one active ref for each inode in ->mk_decrypted_inodes. * * There is one structural ref associated with the active refcount being * nonzero. Finding a key in the keyring also takes a structural ref, * which is then held temporarily while the key is operated on. */ refcount_t mk_active_refs; refcount_t mk_struct_refs; struct rcu_head mk_rcu_head; /* * The secret key material. Wiped as soon as it is no longer needed; * for details, see the fscrypt_master_key struct comment. * * Locking: protected by ->mk_sem. */ struct fscrypt_master_key_secret mk_secret; /* * For v1 policy keys: an arbitrary key descriptor which was assigned by * userspace (->descriptor). * * For v2 policy keys: a cryptographic hash of this key (->identifier). */ struct fscrypt_key_specifier mk_spec; /* * Keyring which contains a key of type 'key_type_fscrypt_user' for each * user who has added this key. Normally each key will be added by just * one user, but it's possible that multiple users share a key, and in * that case we need to keep track of those users so that one user can't * remove the key before the others want it removed too. * * This is NULL for v1 policy keys; those can only be added by root. * * Locking: protected by ->mk_sem. (We don't just rely on the keyrings * subsystem semaphore ->mk_users->sem, as we need support for atomic * search+insert along with proper synchronization with other fields.) */ struct key *mk_users; /* * List of inodes that were unlocked using this key. This allows the * inodes to be evicted efficiently if the key is removed. */ struct list_head mk_decrypted_inodes; spinlock_t mk_decrypted_inodes_lock; /* * Per-mode encryption keys for the various types of encryption policies * that use them. Allocated and derived on-demand. */ struct fscrypt_prepared_key mk_direct_keys[FSCRYPT_MODE_MAX + 1]; struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[FSCRYPT_MODE_MAX + 1]; struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[FSCRYPT_MODE_MAX + 1]; /* Hash key for inode numbers. Initialized only when needed. */ siphash_key_t mk_ino_hash_key; bool mk_ino_hash_key_initialized; /* * Whether this key is in the "present" state, i.e. fully usable. For * details, see the fscrypt_master_key struct comment. * * Locking: protected by ->mk_sem, but can be read locklessly using * READ_ONCE(). Writers must use WRITE_ONCE() when concurrent readers * are possible. */ bool mk_present; } __randomize_layout; static inline const char *master_key_spec_type( const struct fscrypt_key_specifier *spec) { switch (spec->type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: return "descriptor"; case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: return "identifier"; } return "[unknown]"; } static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec) { switch (spec->type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: return FSCRYPT_KEY_DESCRIPTOR_SIZE; case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: return FSCRYPT_KEY_IDENTIFIER_SIZE; } return 0; } void fscrypt_put_master_key(struct fscrypt_master_key *mk); void fscrypt_put_master_key_activeref(struct super_block *sb, struct fscrypt_master_key *mk); struct fscrypt_master_key * fscrypt_find_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec); int fscrypt_get_test_dummy_key_identifier( u8 key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]); int fscrypt_add_test_dummy_key(struct super_block *sb, struct fscrypt_key_specifier *key_spec); int fscrypt_verify_key_added(struct super_block *sb, const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]); int __init fscrypt_init_keyring(void); /* keysetup.c */ struct fscrypt_mode { const char *friendly_name; const char *cipher_str; int keysize; /* key size in bytes */ int security_strength; /* security strength in bytes */ int ivsize; /* IV size in bytes */ int logged_cryptoapi_impl; int logged_blk_crypto_native; int logged_blk_crypto_fallback; enum blk_crypto_mode_num blk_crypto_mode; }; extern struct fscrypt_mode fscrypt_modes[]; int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_inode_info *ci); void fscrypt_destroy_prepared_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key); int fscrypt_set_per_file_enc_key(struct fscrypt_inode_info *ci, const u8 *raw_key); int fscrypt_derive_dirhash_key(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk); void fscrypt_hash_inode_number(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk); int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported); /** * fscrypt_require_key() - require an inode's encryption key * @inode: the inode we need the key for * * If the inode is encrypted, set up its encryption key if not already done. * Then require that the key be present and return -ENOKEY otherwise. * * No locks are needed, and the key will live as long as the struct inode --- so * it won't go away from under you. * * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code * if a problem occurred while setting up the encryption key. */ static inline int fscrypt_require_key(struct inode *inode) { if (IS_ENCRYPTED(inode)) { int err = fscrypt_get_encryption_info(inode, false); if (err) return err; if (!fscrypt_has_encryption_key(inode)) return -ENOKEY; } return 0; } /* keysetup_v1.c */ void fscrypt_put_direct_key(struct fscrypt_direct_key *dk); int fscrypt_setup_v1_file_key(struct fscrypt_inode_info *ci, const u8 *raw_master_key); int fscrypt_setup_v1_file_key_via_subscribed_keyrings( struct fscrypt_inode_info *ci); /* policy.c */ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, const union fscrypt_policy *policy2); int fscrypt_policy_to_key_spec(const union fscrypt_policy *policy, struct fscrypt_key_specifier *key_spec); const union fscrypt_policy *fscrypt_get_dummy_policy(struct super_block *sb); bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, const struct inode *inode); int fscrypt_policy_from_context(union fscrypt_policy *policy_u, const union fscrypt_context *ctx_u, int ctx_size); const union fscrypt_policy *fscrypt_policy_to_inherit(struct inode *dir); #endif /* _FSCRYPT_PRIVATE_H */
1 391 315 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_UTSNAME_H #define _LINUX_UTSNAME_H #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/err.h> #include <uapi/linux/utsname.h> enum uts_proc { UTS_PROC_ARCH, UTS_PROC_OSTYPE, UTS_PROC_OSRELEASE, UTS_PROC_VERSION, UTS_PROC_HOSTNAME, UTS_PROC_DOMAINNAME, }; struct user_namespace; extern struct user_namespace init_user_ns; struct uts_namespace { struct new_utsname name; struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; } __randomize_layout; extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS static inline void get_uts_ns(struct uts_namespace *ns) { refcount_inc(&ns->ns.count); } extern struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns); extern void free_uts_ns(struct uts_namespace *ns); static inline void put_uts_ns(struct uts_namespace *ns) { if (refcount_dec_and_test(&ns->ns.count)) free_uts_ns(ns); } void uts_ns_init(void); #else static inline void get_uts_ns(struct uts_namespace *ns) { } static inline void put_uts_ns(struct uts_namespace *ns) { } static inline struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns) { if (flags & CLONE_NEWUTS) return ERR_PTR(-EINVAL); return old_ns; } static inline void uts_ns_init(void) { } #endif #ifdef CONFIG_PROC_SYSCTL extern void uts_proc_notify(enum uts_proc proc); #else static inline void uts_proc_notify(enum uts_proc proc) { } #endif static inline struct new_utsname *utsname(void) { return &current->nsproxy->uts_ns->name; } static inline struct new_utsname *init_utsname(void) { return &init_uts_ns.name; } extern struct rw_semaphore uts_sem; #endif /* _LINUX_UTSNAME_H */
272 30 8 29 6 25 10 3 16 27 24 2 18 9 11 9 6 1 26 31 23 8 31 16 14 21 14 10 3 1 2 16 15 15 15 1 30 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 // SPDX-License-Identifier: GPL-2.0-or-later /* * Generic address resolution entity * * Authors: * net_random Alan Cox * net_ratelimit Andi Kleen * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project * * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/inet.h> #include <linux/mm.h> #include <linux/net.h> #include <linux/string.h> #include <linux/types.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/ratelimit.h> #include <linux/socket.h> #include <net/sock.h> #include <net/net_ratelimit.h> #include <net/ipv6.h> #include <asm/byteorder.h> #include <linux/uaccess.h> DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { return __ratelimit(&net_ratelimit_state); } EXPORT_SYMBOL(net_ratelimit); /* * Convert an ASCII string to binary IP. * This is outside of net/ipv4/ because various code that uses IP addresses * is otherwise not dependent on the TCP/IP stack. */ __be32 in_aton(const char *str) { unsigned int l; unsigned int val; int i; l = 0; for (i = 0; i < 4; i++) { l <<= 8; if (*str != '\0') { val = 0; while (*str != '\0' && *str != '.' && *str != '\n') { val *= 10; val += *str - '0'; str++; } l |= val; if (*str != '\0') str++; } } return htonl(l); } EXPORT_SYMBOL(in_aton); #define IN6PTON_XDIGIT 0x00010000 #define IN6PTON_DIGIT 0x00020000 #define IN6PTON_COLON_MASK 0x00700000 #define IN6PTON_COLON_1 0x00100000 /* single : requested */ #define IN6PTON_COLON_2 0x00200000 /* second : requested */ #define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ #define IN6PTON_DOT 0x00800000 /* . */ #define IN6PTON_DELIM 0x10000000 #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 static inline int xdigit2bin(char c, int delim) { int val; if (c == delim || c == '\0') return IN6PTON_DELIM; if (c == ':') return IN6PTON_COLON_MASK; if (c == '.') return IN6PTON_DOT; val = hex_to_bin(c); if (val >= 0) return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0); if (delim == -1) return IN6PTON_DELIM; return IN6PTON_UNKNOWN; } /** * in4_pton - convert an IPv4 address from literal to binary representation * @src: the start of the IPv4 address string * @srclen: the length of the string, -1 means strlen(src) * @dst: the binary (u8[4] array) representation of the IPv4 address * @delim: the delimiter of the IPv4 address in @src, -1 means no delimiter * @end: A pointer to the end of the parsed string will be placed here * * Return one on success, return zero when any error occurs * and @end will point to the end of the parsed string. * */ int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s; u8 *d; u8 dbuf[4]; int ret = 0; int i; int w = 0; if (srclen < 0) srclen = strlen(src); s = src; d = dbuf; i = 0; while (1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) { goto out; } if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (w == 0) goto out; *d++ = w & 0xff; w = 0; i++; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (i != 4) goto out; break; } goto cont; } w = (w * 10) + c; if ((w & 0xffff) > 255) { goto out; } cont: if (i >= 4) goto out; s++; srclen--; } ret = 1; memcpy(dst, dbuf, sizeof(dbuf)); out: if (end) *end = s; return ret; } EXPORT_SYMBOL(in4_pton); /** * in6_pton - convert an IPv6 address from literal to binary representation * @src: the start of the IPv6 address string * @srclen: the length of the string, -1 means strlen(src) * @dst: the binary (u8[16] array) representation of the IPv6 address * @delim: the delimiter of the IPv6 address in @src, -1 means no delimiter * @end: A pointer to the end of the parsed string will be placed here * * Return one on success, return zero when any error occurs * and @end will point to the end of the parsed string. * */ int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s, *tok = NULL; u8 *d, *dc = NULL; u8 dbuf[16]; int ret = 0; int i; int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; int w = 0; memset(dbuf, 0, sizeof(dbuf)); s = src; d = dbuf; if (srclen < 0) srclen = strlen(src); while (1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & state)) goto out; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { /* process one 16-bit word */ if (!(state & IN6PTON_NULL)) { *d++ = (w >> 8) & 0xff; *d++ = w & 0xff; } w = 0; if (c & IN6PTON_DELIM) { /* We've processed last word */ break; } /* * COLON_1 => XDIGIT * COLON_2 => XDIGIT|DELIM * COLON_1_2 => COLON_2 */ switch (state & IN6PTON_COLON_MASK) { case IN6PTON_COLON_2: dc = d; state = IN6PTON_XDIGIT | IN6PTON_DELIM; if (dc - dbuf >= sizeof(dbuf)) state |= IN6PTON_NULL; break; case IN6PTON_COLON_1|IN6PTON_COLON_1_2: state = IN6PTON_XDIGIT | IN6PTON_COLON_2; break; case IN6PTON_COLON_1: state = IN6PTON_XDIGIT; break; case IN6PTON_COLON_1_2: state = IN6PTON_COLON_2; break; default: state = 0; } tok = s + 1; goto cont; } if (c & IN6PTON_DOT) { ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); if (ret > 0) { d += 4; break; } goto out; } w = (w << 4) | (0xff & c); state = IN6PTON_COLON_1 | IN6PTON_DELIM; if (!(w & 0xf000)) { state |= IN6PTON_XDIGIT; } if (!dc && d + 2 < dbuf + sizeof(dbuf)) { state |= IN6PTON_COLON_1_2; state &= ~IN6PTON_DELIM; } if (d + 2 >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); } cont: if ((dc && d + 4 < dbuf + sizeof(dbuf)) || d + 4 == dbuf + sizeof(dbuf)) { state |= IN6PTON_DOT; } if (d >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); } s++; srclen--; } i = 15; d--; if (dc) { while (d >= dc) dst[i--] = *d--; while (i >= dc - dbuf) dst[i--] = 0; while (i >= 0) dst[i--] = *d--; } else memcpy(dst, dbuf, sizeof(dbuf)); ret = 1; out: if (end) *end = s; return ret; } EXPORT_SYMBOL(in6_pton); static int inet4_pton(const char *src, u16 port_num, struct sockaddr_storage *addr) { struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; size_t srclen = strlen(src); if (srclen > INET_ADDRSTRLEN) return -EINVAL; if (in4_pton(src, srclen, (u8 *)&addr4->sin_addr.s_addr, '\n', NULL) == 0) return -EINVAL; addr4->sin_family = AF_INET; addr4->sin_port = htons(port_num); return 0; } static int inet6_pton(struct net *net, const char *src, u16 port_num, struct sockaddr_storage *addr) { struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; const char *scope_delim; size_t srclen = strlen(src); if (srclen > INET6_ADDRSTRLEN) return -EINVAL; if (in6_pton(src, srclen, (u8 *)&addr6->sin6_addr.s6_addr, '%', &scope_delim) == 0) return -EINVAL; if (ipv6_addr_type(&addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL && src + srclen != scope_delim && *scope_delim == '%') { struct net_device *dev; char scope_id[16]; size_t scope_len = min_t(size_t, sizeof(scope_id) - 1, src + srclen - scope_delim - 1); memcpy(scope_id, scope_delim + 1, scope_len); scope_id[scope_len] = '\0'; dev = dev_get_by_name(net, scope_id); if (dev) { addr6->sin6_scope_id = dev->ifindex; dev_put(dev); } else if (kstrtouint(scope_id, 0, &addr6->sin6_scope_id)) { return -EINVAL; } } addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(port_num); return 0; } /** * inet_pton_with_scope - convert an IPv4/IPv6 and port to socket address * @net: net namespace (used for scope handling) * @af: address family, AF_INET, AF_INET6 or AF_UNSPEC for either * @src: the start of the address string * @port: the start of the port string (or NULL for none) * @addr: output socket address * * Return zero on success, return errno when any error occurs. */ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af, const char *src, const char *port, struct sockaddr_storage *addr) { u16 port_num; int ret = -EINVAL; if (port) { if (kstrtou16(port, 0, &port_num)) return -EINVAL; } else { port_num = 0; } switch (af) { case AF_INET: ret = inet4_pton(src, port_num, addr); break; case AF_INET6: ret = inet6_pton(net, src, port_num, addr); break; case AF_UNSPEC: ret = inet4_pton(src, port_num, addr); if (ret) ret = inet6_pton(net, src, port_num, addr); break; default: pr_err("unexpected address family %d\n", af); } return ret; } EXPORT_SYMBOL(inet_pton_with_scope); bool inet_addr_is_any(struct sockaddr *addr) { if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr; const struct sockaddr_in6 in6_any = { .sin6_addr = IN6ADDR_ANY_INIT }; if (!memcmp(in6->sin6_addr.s6_addr, in6_any.sin6_addr.s6_addr, 16)) return true; } else if (addr->sa_family == AF_INET) { struct sockaddr_in *in = (struct sockaddr_in *)addr; if (in->sin_addr.s_addr == htonl(INADDR_ANY)) return true; } else { pr_warn("unexpected address family %u\n", addr->sa_family); } return false; } EXPORT_SYMBOL(inet_addr_is_any); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace4(sum, from, to); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) skb->csum = ~csum_add(csum_sub(~(skb->csum), (__force __wsum)from), (__force __wsum)to); } else if (pseudohdr) *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum), (__force __wsum)from), (__force __wsum)to)); } EXPORT_SYMBOL(inet_proto_csum_replace4); /** * inet_proto_csum_replace16 - update layer 4 header checksum field * @sum: Layer 4 header checksum field * @skb: sk_buff for the packet * @from: old IPv6 address * @to: new IPv6 address * @pseudohdr: True if layer 4 header checksum includes pseudoheader * * Update layer 4 header as per the update in IPv6 src/dst address. * * There is no need to update skb->csum in this function, because update in two * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to * update skb->csum, because update in 3 fields a.) IPv4 src/dst address, * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as * L4 Header checksum for skb->csum calculation. */ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr) { __be32 diff[] = { ~from[0], ~from[1], ~from[2], ~from[3], to[0], to[1], to[2], to[3], }; if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); } EXPORT_SYMBOL(inet_proto_csum_replace16); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace_by_diff(sum, diff); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) skb->csum = ~csum_sub(diff, skb->csum); } else if (pseudohdr) { *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); } } EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
2 30 25 3 7 24 25 25 25 25 25 25 25 25 3 25 25 25 9 4 9 9 7 15 25 25 25 25 25 25 9 9 9 9 9 9 9 9 9 9 8 9 9 9 9 9 9 8 9 9 9 9 22 13 9 9 22 29 30 30 30 30 30 19 27 30 22 22 22 22 22 22 4 22 14 14 13 6 12 14 10 11 14 30 30 22 22 34 33 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #include <linux/mm.h> #include <linux/llist.h> #include <linux/bpf.h> #include <linux/irq_work.h> #include <linux/bpf_mem_alloc.h> #include <linux/memcontrol.h> #include <asm/local.h> /* Any context (including NMI) BPF specific memory allocator. * * Tracing BPF programs can attach to kprobe and fentry. Hence they * run in unknown context where calling plain kmalloc() might not be safe. * * Front-end kmalloc() with per-cpu per-bucket cache of free elements. * Refill this cache asynchronously from irq_work. * * CPU_0 buckets * 16 32 64 96 128 196 256 512 1024 2048 4096 * ... * CPU_N buckets * 16 32 64 96 128 196 256 512 1024 2048 4096 * * The buckets are prefilled at the start. * BPF programs always run with migration disabled. * It's safe to allocate from cache of the current cpu with irqs disabled. * Free-ing is always done into bucket of the current cpu as well. * irq_work trims extra free elements from buckets with kfree * and refills them with kmalloc, so global kmalloc logic takes care * of freeing objects allocated by one cpu and freed on another. * * Every allocated objected is padded with extra 8 bytes that contains * struct llist_node. */ #define LLIST_NODE_SZ sizeof(struct llist_node) #define BPF_MEM_ALLOC_SIZE_MAX 4096 /* similar to kmalloc, but sizeof == 8 bucket is gone */ static u8 size_index[24] __ro_after_init = { 3, /* 8 */ 3, /* 16 */ 4, /* 24 */ 4, /* 32 */ 5, /* 40 */ 5, /* 48 */ 5, /* 56 */ 5, /* 64 */ 1, /* 72 */ 1, /* 80 */ 1, /* 88 */ 1, /* 96 */ 6, /* 104 */ 6, /* 112 */ 6, /* 120 */ 6, /* 128 */ 2, /* 136 */ 2, /* 144 */ 2, /* 152 */ 2, /* 160 */ 2, /* 168 */ 2, /* 176 */ 2, /* 184 */ 2 /* 192 */ }; static int bpf_mem_cache_idx(size_t size) { if (!size || size > BPF_MEM_ALLOC_SIZE_MAX) return -1; if (size <= 192) return size_index[(size - 1) / 8] - 1; return fls(size - 1) - 2; } #define NUM_CACHES 11 struct bpf_mem_cache { /* per-cpu list of free objects of size 'unit_size'. * All accesses are done with interrupts disabled and 'active' counter * protection with __llist_add() and __llist_del_first(). */ struct llist_head free_llist; local_t active; /* Operations on the free_list from unit_alloc/unit_free/bpf_mem_refill * are sequenced by per-cpu 'active' counter. But unit_free() cannot * fail. When 'active' is busy the unit_free() will add an object to * free_llist_extra. */ struct llist_head free_llist_extra; struct irq_work refill_work; struct obj_cgroup *objcg; int unit_size; /* count of objects in free_llist */ int free_cnt; int low_watermark, high_watermark, batch; int percpu_size; bool draining; struct bpf_mem_cache *tgt; /* list of objects to be freed after RCU GP */ struct llist_head free_by_rcu; struct llist_node *free_by_rcu_tail; struct llist_head waiting_for_gp; struct llist_node *waiting_for_gp_tail; struct rcu_head rcu; atomic_t call_rcu_in_progress; struct llist_head free_llist_extra_rcu; /* list of objects to be freed after RCU tasks trace GP */ struct llist_head free_by_rcu_ttrace; struct llist_head waiting_for_gp_ttrace; struct rcu_head rcu_ttrace; atomic_t call_rcu_ttrace_in_progress; }; struct bpf_mem_caches { struct bpf_mem_cache cache[NUM_CACHES]; }; static const u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; static struct llist_node notrace *__llist_del_first(struct llist_head *head) { struct llist_node *entry, *next; entry = head->first; if (!entry) return NULL; next = entry->next; head->first = next; return entry; } static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags) { if (c->percpu_size) { void __percpu **obj = kmalloc_node(c->percpu_size, flags, node); void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags); if (!obj || !pptr) { free_percpu(pptr); kfree(obj); return NULL; } obj[1] = pptr; return obj; } return kmalloc_node(c->unit_size, flags | __GFP_ZERO, node); } static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c) { #ifdef CONFIG_MEMCG if (c->objcg) return get_mem_cgroup_from_objcg(c->objcg); return root_mem_cgroup; #else return NULL; #endif } static void inc_active(struct bpf_mem_cache *c, unsigned long *flags) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) /* In RT irq_work runs in per-cpu kthread, so disable * interrupts to avoid preemption and interrupts and * reduce the chance of bpf prog executing on this cpu * when active counter is busy. */ local_irq_save(*flags); /* alloc_bulk runs from irq_work which will not preempt a bpf * program that does unit_alloc/unit_free since IRQs are * disabled there. There is no race to increment 'active' * counter. It protects free_llist from corruption in case NMI * bpf prog preempted this loop. */ WARN_ON_ONCE(local_inc_return(&c->active) != 1); } static void dec_active(struct bpf_mem_cache *c, unsigned long *flags) { local_dec(&c->active); if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_restore(*flags); } static void add_obj_to_free_list(struct bpf_mem_cache *c, void *obj) { unsigned long flags; inc_active(c, &flags); __llist_add(obj, &c->free_llist); c->free_cnt++; dec_active(c, &flags); } /* Mostly runs from irq_work except __init phase. */ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic) { struct mem_cgroup *memcg = NULL, *old_memcg; gfp_t gfp; void *obj; int i; gfp = __GFP_NOWARN | __GFP_ACCOUNT; gfp |= atomic ? GFP_NOWAIT : GFP_KERNEL; for (i = 0; i < cnt; i++) { /* * For every 'c' llist_del_first(&c->free_by_rcu_ttrace); is * done only by one CPU == current CPU. Other CPUs might * llist_add() and llist_del_all() in parallel. */ obj = llist_del_first(&c->free_by_rcu_ttrace); if (!obj) break; add_obj_to_free_list(c, obj); } if (i >= cnt) return; for (; i < cnt; i++) { obj = llist_del_first(&c->waiting_for_gp_ttrace); if (!obj) break; add_obj_to_free_list(c, obj); } if (i >= cnt) return; memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); for (; i < cnt; i++) { /* Allocate, but don't deplete atomic reserves that typical * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc * will allocate from the current numa node which is what we * want here. */ obj = __alloc(c, node, gfp); if (!obj) break; add_obj_to_free_list(c, obj); } set_active_memcg(old_memcg); mem_cgroup_put(memcg); } static void free_one(void *obj, bool percpu) { if (percpu) free_percpu(((void __percpu **)obj)[1]); kfree(obj); } static int free_all(struct llist_node *llnode, bool percpu) { struct llist_node *pos, *t; int cnt = 0; llist_for_each_safe(pos, t, llnode) { free_one(pos, percpu); cnt++; } return cnt; } static void __free_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace); free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); atomic_set(&c->call_rcu_ttrace_in_progress, 0); } static void __free_rcu_tasks_trace(struct rcu_head *head) { /* If RCU Tasks Trace grace period implies RCU grace period, * there is no need to invoke call_rcu(). */ if (rcu_trace_implies_rcu_gp()) __free_rcu(head); else call_rcu(head, __free_rcu); } static void enque_to_free(struct bpf_mem_cache *c, void *obj) { struct llist_node *llnode = obj; /* bpf_mem_cache is a per-cpu object. Freeing happens in irq_work. * Nothing races to add to free_by_rcu_ttrace list. */ llist_add(llnode, &c->free_by_rcu_ttrace); } static void do_call_rcu_ttrace(struct bpf_mem_cache *c) { struct llist_node *llnode, *t; if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) { if (unlikely(READ_ONCE(c->draining))) { llnode = llist_del_all(&c->free_by_rcu_ttrace); free_all(llnode, !!c->percpu_size); } return; } WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp_ttrace)); llist_for_each_safe(llnode, t, llist_del_all(&c->free_by_rcu_ttrace)) llist_add(llnode, &c->waiting_for_gp_ttrace); if (unlikely(READ_ONCE(c->draining))) { __free_rcu(&c->rcu_ttrace); return; } /* Use call_rcu_tasks_trace() to wait for sleepable progs to finish. * If RCU Tasks Trace grace period implies RCU grace period, free * these elements directly, else use call_rcu() to wait for normal * progs to finish and finally do free_one() on each element. */ call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu_tasks_trace); } static void free_bulk(struct bpf_mem_cache *c) { struct bpf_mem_cache *tgt = c->tgt; struct llist_node *llnode, *t; unsigned long flags; int cnt; WARN_ON_ONCE(tgt->unit_size != c->unit_size); WARN_ON_ONCE(tgt->percpu_size != c->percpu_size); do { inc_active(c, &flags); llnode = __llist_del_first(&c->free_llist); if (llnode) cnt = --c->free_cnt; else cnt = 0; dec_active(c, &flags); if (llnode) enque_to_free(tgt, llnode); } while (cnt > (c->high_watermark + c->low_watermark) / 2); /* and drain free_llist_extra */ llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra)) enque_to_free(tgt, llnode); do_call_rcu_ttrace(tgt); } static void __free_by_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); struct bpf_mem_cache *tgt = c->tgt; struct llist_node *llnode; WARN_ON_ONCE(tgt->unit_size != c->unit_size); WARN_ON_ONCE(tgt->percpu_size != c->percpu_size); llnode = llist_del_all(&c->waiting_for_gp); if (!llnode) goto out; llist_add_batch(llnode, c->waiting_for_gp_tail, &tgt->free_by_rcu_ttrace); /* Objects went through regular RCU GP. Send them to RCU tasks trace */ do_call_rcu_ttrace(tgt); out: atomic_set(&c->call_rcu_in_progress, 0); } static void check_free_by_rcu(struct bpf_mem_cache *c) { struct llist_node *llnode, *t; unsigned long flags; /* drain free_llist_extra_rcu */ if (unlikely(!llist_empty(&c->free_llist_extra_rcu))) { inc_active(c, &flags); llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra_rcu)) if (__llist_add(llnode, &c->free_by_rcu)) c->free_by_rcu_tail = llnode; dec_active(c, &flags); } if (llist_empty(&c->free_by_rcu)) return; if (atomic_xchg(&c->call_rcu_in_progress, 1)) { /* * Instead of kmalloc-ing new rcu_head and triggering 10k * call_rcu() to hit rcutree.qhimark and force RCU to notice * the overload just ask RCU to hurry up. There could be many * objects in free_by_rcu list. * This hint reduces memory consumption for an artificial * benchmark from 2 Gbyte to 150 Mbyte. */ rcu_request_urgent_qs_task(current); return; } WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp)); inc_active(c, &flags); WRITE_ONCE(c->waiting_for_gp.first, __llist_del_all(&c->free_by_rcu)); c->waiting_for_gp_tail = c->free_by_rcu_tail; dec_active(c, &flags); if (unlikely(READ_ONCE(c->draining))) { free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); atomic_set(&c->call_rcu_in_progress, 0); } else { call_rcu_hurry(&c->rcu, __free_by_rcu); } } static void bpf_mem_refill(struct irq_work *work) { struct bpf_mem_cache *c = container_of(work, struct bpf_mem_cache, refill_work); int cnt; /* Racy access to free_cnt. It doesn't need to be 100% accurate */ cnt = c->free_cnt; if (cnt < c->low_watermark) /* irq_work runs on this cpu and kmalloc will allocate * from the current numa node which is what we want here. */ alloc_bulk(c, c->batch, NUMA_NO_NODE, true); else if (cnt > c->high_watermark) free_bulk(c); check_free_by_rcu(c); } static void notrace irq_work_raise(struct bpf_mem_cache *c) { irq_work_queue(&c->refill_work); } /* For typical bpf map case that uses bpf_mem_cache_alloc and single bucket * the freelist cache will be elem_size * 64 (or less) on each cpu. * * For bpf programs that don't have statically known allocation sizes and * assuming (low_mark + high_mark) / 2 as an average number of elements per * bucket and all buckets are used the total amount of memory in freelists * on each cpu will be: * 64*16 + 64*32 + 64*64 + 64*96 + 64*128 + 64*196 + 64*256 + 32*512 + 16*1024 + 8*2048 + 4*4096 * == ~ 116 Kbyte using below heuristic. * Initialized, but unused bpf allocator (not bpf map specific one) will * consume ~ 11 Kbyte per cpu. * Typical case will be between 11K and 116K closer to 11K. * bpf progs can and should share bpf_mem_cache when possible. * * Percpu allocation is typically rare. To avoid potential unnecessary large * memory consumption, set low_mark = 1 and high_mark = 3, resulting in c->batch = 1. */ static void init_refill_work(struct bpf_mem_cache *c) { init_irq_work(&c->refill_work, bpf_mem_refill); if (c->percpu_size) { c->low_watermark = 1; c->high_watermark = 3; } else if (c->unit_size <= 256) { c->low_watermark = 32; c->high_watermark = 96; } else { /* When page_size == 4k, order-0 cache will have low_mark == 2 * and high_mark == 6 with batch alloc of 3 individual pages at * a time. * 8k allocs and above low == 1, high == 3, batch == 1. */ c->low_watermark = max(32 * 256 / c->unit_size, 1); c->high_watermark = max(96 * 256 / c->unit_size, 3); } c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1); } static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) { int cnt = 1; /* To avoid consuming memory, for non-percpu allocation, assume that * 1st run of bpf prog won't be doing more than 4 map_update_elem from * irq disabled region if unit size is less than or equal to 256. * For all other cases, let us just do one allocation. */ if (!c->percpu_size && c->unit_size <= 256) cnt = 4; alloc_bulk(c, cnt, cpu_to_node(cpu), false); } /* When size != 0 bpf_mem_cache for each cpu. * This is typical bpf hash map use case when all elements have equal size. * * When size == 0 allocate 11 bpf_mem_cache-s for each cpu, then rely on * kmalloc/kfree. Max allocation size is 4096 in this case. * This is bpf_dynptr and bpf_kptr use case. */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc; struct bpf_mem_cache *c; struct bpf_mem_cache __percpu *pc; struct obj_cgroup *objcg = NULL; int cpu, i, unit_size, percpu_size = 0; if (percpu && size == 0) return -EINVAL; /* room for llist_node and per-cpu pointer */ if (percpu) percpu_size = LLIST_NODE_SZ + sizeof(void *); ma->percpu = percpu; if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); if (!pc) return -ENOMEM; if (!percpu) size += LLIST_NODE_SZ; /* room for llist_node */ unit_size = size; #ifdef CONFIG_MEMCG if (memcg_bpf_enabled()) objcg = get_obj_cgroup_from_current(); #endif ma->objcg = objcg; for_each_possible_cpu(cpu) { c = per_cpu_ptr(pc, cpu); c->unit_size = unit_size; c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; init_refill_work(c); prefill_mem_cache(c, cpu); } ma->cache = pc; return 0; } pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; #ifdef CONFIG_MEMCG objcg = get_obj_cgroup_from_current(); #endif ma->objcg = objcg; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(pcc, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; c->unit_size = sizes[i]; c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; init_refill_work(c); prefill_mem_cache(c, cpu); } } ma->caches = pcc; return 0; } int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg) { struct bpf_mem_caches __percpu *pcc; pcc = __alloc_percpu_gfp(sizeof(struct bpf_mem_caches), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; ma->caches = pcc; ma->objcg = objcg; ma->percpu = true; return 0; } int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size) { struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc; int cpu, i, unit_size, percpu_size; struct obj_cgroup *objcg; struct bpf_mem_cache *c; i = bpf_mem_cache_idx(size); if (i < 0) return -EINVAL; /* room for llist_node and per-cpu pointer */ percpu_size = LLIST_NODE_SZ + sizeof(void *); unit_size = sizes[i]; objcg = ma->objcg; pcc = ma->caches; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(pcc, cpu); c = &cc->cache[i]; if (c->unit_size) break; c->unit_size = unit_size; c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; init_refill_work(c); prefill_mem_cache(c, cpu); } return 0; } static void drain_mem_cache(struct bpf_mem_cache *c) { bool percpu = !!c->percpu_size; /* No progs are using this bpf_mem_cache, but htab_map_free() called * bpf_mem_cache_free() for all remaining elements and they can be in * free_by_rcu_ttrace or in waiting_for_gp_ttrace lists, so drain those lists now. * * Except for waiting_for_gp_ttrace list, there are no concurrent operations * on these lists, so it is safe to use __llist_del_all(). */ free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu); free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu); free_all(__llist_del_all(&c->free_llist), percpu); free_all(__llist_del_all(&c->free_llist_extra), percpu); free_all(__llist_del_all(&c->free_by_rcu), percpu); free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu); free_all(llist_del_all(&c->waiting_for_gp), percpu); } static void check_mem_cache(struct bpf_mem_cache *c) { WARN_ON_ONCE(!llist_empty(&c->free_by_rcu_ttrace)); WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp_ttrace)); WARN_ON_ONCE(!llist_empty(&c->free_llist)); WARN_ON_ONCE(!llist_empty(&c->free_llist_extra)); WARN_ON_ONCE(!llist_empty(&c->free_by_rcu)); WARN_ON_ONCE(!llist_empty(&c->free_llist_extra_rcu)); WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp)); } static void check_leaked_objs(struct bpf_mem_alloc *ma) { struct bpf_mem_caches *cc; struct bpf_mem_cache *c; int cpu, i; if (ma->cache) { for_each_possible_cpu(cpu) { c = per_cpu_ptr(ma->cache, cpu); check_mem_cache(c); } } if (ma->caches) { for_each_possible_cpu(cpu) { cc = per_cpu_ptr(ma->caches, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; check_mem_cache(c); } } } } static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma) { check_leaked_objs(ma); free_percpu(ma->cache); free_percpu(ma->caches); ma->cache = NULL; ma->caches = NULL; } static void free_mem_alloc(struct bpf_mem_alloc *ma) { /* waiting_for_gp[_ttrace] lists were drained, but RCU callbacks * might still execute. Wait for them. * * rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(), * but rcu_barrier_tasks_trace() and rcu_barrier() below are only used * to wait for the pending __free_rcu_tasks_trace() and __free_rcu(), * so if call_rcu(head, __free_rcu) is skipped due to * rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by * using rcu_trace_implies_rcu_gp() as well. */ rcu_barrier(); /* wait for __free_by_rcu */ rcu_barrier_tasks_trace(); /* wait for __free_rcu */ if (!rcu_trace_implies_rcu_gp()) rcu_barrier(); free_mem_alloc_no_barrier(ma); } static void free_mem_alloc_deferred(struct work_struct *work) { struct bpf_mem_alloc *ma = container_of(work, struct bpf_mem_alloc, work); free_mem_alloc(ma); kfree(ma); } static void destroy_mem_alloc(struct bpf_mem_alloc *ma, int rcu_in_progress) { struct bpf_mem_alloc *copy; if (!rcu_in_progress) { /* Fast path. No callbacks are pending, hence no need to do * rcu_barrier-s. */ free_mem_alloc_no_barrier(ma); return; } copy = kmemdup(ma, sizeof(*ma), GFP_KERNEL); if (!copy) { /* Slow path with inline barrier-s */ free_mem_alloc(ma); return; } /* Defer barriers into worker to let the rest of map memory to be freed */ memset(ma, 0, sizeof(*ma)); INIT_WORK(&copy->work, free_mem_alloc_deferred); queue_work(system_unbound_wq, &copy->work); } void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) { struct bpf_mem_caches *cc; struct bpf_mem_cache *c; int cpu, i, rcu_in_progress; if (ma->cache) { rcu_in_progress = 0; for_each_possible_cpu(cpu) { c = per_cpu_ptr(ma->cache, cpu); WRITE_ONCE(c->draining, true); irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } if (ma->caches) { rcu_in_progress = 0; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(ma->caches, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; WRITE_ONCE(c->draining, true); irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } } obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } } /* notrace is necessary here and in other functions to make sure * bpf programs cannot attach to them and cause llist corruptions. */ static void notrace *unit_alloc(struct bpf_mem_cache *c) { struct llist_node *llnode = NULL; unsigned long flags; int cnt = 0; /* Disable irqs to prevent the following race for majority of prog types: * prog_A * bpf_mem_alloc * preemption or irq -> prog_B * bpf_mem_alloc * * but prog_B could be a perf_event NMI prog. * Use per-cpu 'active' counter to order free_list access between * unit_alloc/unit_free/bpf_mem_refill. */ local_irq_save(flags); if (local_inc_return(&c->active) == 1) { llnode = __llist_del_first(&c->free_llist); if (llnode) { cnt = --c->free_cnt; *(struct bpf_mem_cache **)llnode = c; } } local_dec(&c->active); WARN_ON(cnt < 0); if (cnt < c->low_watermark) irq_work_raise(c); /* Enable IRQ after the enqueue of irq work completes, so irq work * will run after IRQ is enabled and free_llist may be refilled by * irq work before other task preempts current task. */ local_irq_restore(flags); return llnode; } /* Though 'ptr' object could have been allocated on a different cpu * add it to the free_llist of the current cpu. * Let kfree() logic deal with it when it's later called from irq_work. */ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr) { struct llist_node *llnode = ptr - LLIST_NODE_SZ; unsigned long flags; int cnt = 0; BUILD_BUG_ON(LLIST_NODE_SZ > 8); /* * Remember bpf_mem_cache that allocated this object. * The hint is not accurate. */ c->tgt = *(struct bpf_mem_cache **)llnode; local_irq_save(flags); if (local_inc_return(&c->active) == 1) { __llist_add(llnode, &c->free_llist); cnt = ++c->free_cnt; } else { /* unit_free() cannot fail. Therefore add an object to atomic * llist. free_bulk() will drain it. Though free_llist_extra is * a per-cpu list we have to use atomic llist_add here, since * it also can be interrupted by bpf nmi prog that does another * unit_free() into the same free_llist_extra. */ llist_add(llnode, &c->free_llist_extra); } local_dec(&c->active); if (cnt > c->high_watermark) /* free few objects from current cpu into global kmalloc pool */ irq_work_raise(c); /* Enable IRQ after irq_work_raise() completes, otherwise when current * task is preempted by task which does unit_alloc(), unit_alloc() may * return NULL unexpectedly because irq work is already pending but can * not been triggered and free_llist can not be refilled timely. */ local_irq_restore(flags); } static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr) { struct llist_node *llnode = ptr - LLIST_NODE_SZ; unsigned long flags; c->tgt = *(struct bpf_mem_cache **)llnode; local_irq_save(flags); if (local_inc_return(&c->active) == 1) { if (__llist_add(llnode, &c->free_by_rcu)) c->free_by_rcu_tail = llnode; } else { llist_add(llnode, &c->free_llist_extra_rcu); } local_dec(&c->active); if (!atomic_read(&c->call_rcu_in_progress)) irq_work_raise(c); local_irq_restore(flags); } /* Called from BPF program or from sys_bpf syscall. * In both cases migration is disabled. */ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) { int idx; void *ret; if (!size) return NULL; if (!ma->percpu) size += LLIST_NODE_SZ; idx = bpf_mem_cache_idx(size); if (idx < 0) return NULL; ret = unit_alloc(this_cpu_ptr(ma->caches)->cache + idx); return !ret ? NULL : ret + LLIST_NODE_SZ; } void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) { struct bpf_mem_cache *c; int idx; if (!ptr) return; c = *(void **)(ptr - LLIST_NODE_SZ); idx = bpf_mem_cache_idx(c->unit_size); if (WARN_ON_ONCE(idx < 0)) return; unit_free(this_cpu_ptr(ma->caches)->cache + idx, ptr); } void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr) { struct bpf_mem_cache *c; int idx; if (!ptr) return; c = *(void **)(ptr - LLIST_NODE_SZ); idx = bpf_mem_cache_idx(c->unit_size); if (WARN_ON_ONCE(idx < 0)) return; unit_free_rcu(this_cpu_ptr(ma->caches)->cache + idx, ptr); } void notrace *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma) { void *ret; ret = unit_alloc(this_cpu_ptr(ma->cache)); return !ret ? NULL : ret + LLIST_NODE_SZ; } void notrace bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr) { if (!ptr) return; unit_free(this_cpu_ptr(ma->cache), ptr); } void notrace bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr) { if (!ptr) return; unit_free_rcu(this_cpu_ptr(ma->cache), ptr); } /* Directly does a kfree() without putting 'ptr' back to the free_llist * for reuse and without waiting for a rcu_tasks_trace gp. * The caller must first go through the rcu_tasks_trace gp for 'ptr' * before calling bpf_mem_cache_raw_free(). * It could be used when the rcu_tasks_trace callback does not have * a hold on the original bpf_mem_alloc object that allocated the * 'ptr'. This should only be used in the uncommon code path. * Otherwise, the bpf_mem_alloc's free_llist cannot be refilled * and may affect performance. */ void bpf_mem_cache_raw_free(void *ptr) { if (!ptr) return; kfree(ptr - LLIST_NODE_SZ); } /* When flags == GFP_KERNEL, it signals that the caller will not cause * deadlock when using kmalloc. bpf_mem_cache_alloc_flags() will use * kmalloc if the free_llist is empty. */ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) { struct bpf_mem_cache *c; void *ret; c = this_cpu_ptr(ma->cache); ret = unit_alloc(c); if (!ret && flags == GFP_KERNEL) { struct mem_cgroup *memcg, *old_memcg; memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT); if (ret) *(struct bpf_mem_cache **)ret = c; set_active_memcg(old_memcg); mem_cgroup_put(memcg); } return !ret ? NULL : ret + LLIST_NODE_SZ; } int bpf_mem_alloc_check_size(bool percpu, size_t size) { /* The size of percpu allocation doesn't have LLIST_NODE_SZ overhead */ if ((percpu && size > BPF_MEM_ALLOC_SIZE_MAX) || (!percpu && size > BPF_MEM_ALLOC_SIZE_MAX - LLIST_NODE_SZ)) return -E2BIG; return 0; }
3 3 3 1 2 3 27 27 27 8 12 197 40 194 3 192 2 53 49 29 28 27 27 7 45 52 52 2 2 31 17 5 12 18 222 1 5 218 5 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/acl.c * * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> */ #include <linux/quotaops.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" #include "acl.h" /* * Convert from filesystem to in-memory representation. */ static struct posix_acl * ext4_acl_from_disk(const void *value, size_t size) { const char *end = (char *)value + size; int n, count; struct posix_acl *acl; if (!value) return NULL; if (size < sizeof(ext4_acl_header)) return ERR_PTR(-EINVAL); if (((ext4_acl_header *)value)->a_version != cpu_to_le32(EXT4_ACL_VERSION)) return ERR_PTR(-EINVAL); value = (char *)value + sizeof(ext4_acl_header); count = ext4_acl_count(size); if (count < 0) return ERR_PTR(-EINVAL); if (count == 0) return NULL; acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); for (n = 0; n < count; n++) { ext4_acl_entry *entry = (ext4_acl_entry *)value; if ((char *)value + sizeof(ext4_acl_entry_short) > end) goto fail; acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); switch (acl->a_entries[n].e_tag) { case ACL_USER_OBJ: case ACL_GROUP_OBJ: case ACL_MASK: case ACL_OTHER: value = (char *)value + sizeof(ext4_acl_entry_short); break; case ACL_USER: value = (char *)value + sizeof(ext4_acl_entry); if ((char *)value > end) goto fail; acl->a_entries[n].e_uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); break; case ACL_GROUP: value = (char *)value + sizeof(ext4_acl_entry); if ((char *)value > end) goto fail; acl->a_entries[n].e_gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); break; default: goto fail; } } if (value != end) goto fail; return acl; fail: posix_acl_release(acl); return ERR_PTR(-EINVAL); } /* * Convert from in-memory to filesystem representation. */ static void * ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) { ext4_acl_header *ext_acl; char *e; size_t n; *size = ext4_acl_size(acl->a_count); ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * sizeof(ext4_acl_entry), GFP_NOFS); if (!ext_acl) return ERR_PTR(-ENOMEM); ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); e = (char *)ext_acl + sizeof(ext4_acl_header); for (n = 0; n < acl->a_count; n++) { const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext4_acl_entry *entry = (ext4_acl_entry *)e; entry->e_tag = cpu_to_le16(acl_e->e_tag); entry->e_perm = cpu_to_le16(acl_e->e_perm); switch (acl_e->e_tag) { case ACL_USER: entry->e_id = cpu_to_le32( from_kuid(&init_user_ns, acl_e->e_uid)); e += sizeof(ext4_acl_entry); break; case ACL_GROUP: entry->e_id = cpu_to_le32( from_kgid(&init_user_ns, acl_e->e_gid)); e += sizeof(ext4_acl_entry); break; case ACL_USER_OBJ: case ACL_GROUP_OBJ: case ACL_MASK: case ACL_OTHER: e += sizeof(ext4_acl_entry_short); break; default: goto fail; } } return (char *)ext_acl; fail: kfree(ext_acl); return ERR_PTR(-EINVAL); } /* * Inode operation get_posix_acl(). * * inode->i_rwsem: don't care */ struct posix_acl * ext4_get_acl(struct inode *inode, int type, bool rcu) { int name_index; char *value = NULL; struct posix_acl *acl; int retval; if (rcu) return ERR_PTR(-ECHILD); switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; break; default: BUG(); } retval = ext4_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { value = kmalloc(retval, GFP_NOFS); if (!value) return ERR_PTR(-ENOMEM); retval = ext4_xattr_get(inode, name_index, "", value, retval); } if (retval > 0) acl = ext4_acl_from_disk(value, retval); else if (retval == -ENODATA || retval == -ENOSYS) acl = NULL; else acl = ERR_PTR(retval); kfree(value); return acl; } /* * Set the access or default ACL of an inode. * * inode->i_rwsem: down unless called from ext4_new_inode */ static int __ext4_set_acl(handle_t *handle, struct inode *inode, int type, struct posix_acl *acl, int xattr_flags) { int name_index; void *value = NULL; size_t size = 0; int error; switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; default: return -EINVAL; } if (acl) { value = ext4_acl_to_disk(acl, &size); if (IS_ERR(value)) return (int)PTR_ERR(value); } error = ext4_xattr_set_handle(handle, inode, name_index, "", value, size, xattr_flags); kfree(value); if (!error) set_cached_acl(inode, type, acl); return error; } int ext4_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { handle_t *handle; int error, credits, retries = 0; size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0; struct inode *inode = d_inode(dentry); umode_t mode = inode->i_mode; int update_mode = 0; error = dquot_initialize(inode); if (error) return error; retry: error = ext4_xattr_set_credits(inode, acl_size, false /* is_create */, &credits); if (error) return error; handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); if (IS_ERR(handle)) return PTR_ERR(handle); if ((type == ACL_TYPE_ACCESS) && acl) { error = posix_acl_update_mode(idmap, inode, &mode, &acl); if (error) goto out_stop; if (mode != inode->i_mode) update_mode = 1; } error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */); if (!error && update_mode) { inode->i_mode = mode; inode_set_ctime_current(inode); error = ext4_mark_inode_dirty(handle, inode); } out_stop: ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; return error; } /* * Initialize the ACLs of a new inode. Called from ext4_new_inode. * * dir->i_rwsem: down * inode->i_rwsem: up (access to inode is still exclusive) */ int ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) { struct posix_acl *default_acl, *acl; int error; error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (error) return error; if (default_acl) { error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT, default_acl, XATTR_CREATE); posix_acl_release(default_acl); } else { inode->i_default_acl = NULL; } if (acl) { if (!error) error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl, XATTR_CREATE); posix_acl_release(acl); } else { inode->i_acl = NULL; } return error; }
338 339 334 10 333 333 332 318 318 318 282 63 63 63 346 346 346 306 318 161 8 8 55 55 42 16 55 12 8 242 242 242 8 2 6 2 292 291 292 17 292 383 31 55 329 41 304 42 42 427 425 426 321 322 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_btree.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" #include "xfs_attr_leaf.h" #include "xfs_types.h" #include "xfs_errortag.h" #include "xfs_health.h" #include "xfs_symlink_remote.h" #include "xfs_rtrmap_btree.h" #include "xfs_rtrefcount_btree.h" struct kmem_cache *xfs_ifork_cache; void xfs_init_local_fork( struct xfs_inode *ip, int whichfork, const void *data, int64_t size) { struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); int mem_size = size; bool zero_terminate; /* * If we are using the local fork to store a symlink body we need to * zero-terminate it so that we can pass it back to the VFS directly. * Overallocate the in-memory fork by one for that and add a zero * to terminate it below. */ zero_terminate = S_ISLNK(VFS_I(ip)->i_mode); if (zero_terminate) mem_size++; if (size) { char *new_data = kmalloc(mem_size, GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); memcpy(new_data, data, size); if (zero_terminate) new_data[size] = '\0'; ifp->if_data = new_data; } else { ifp->if_data = NULL; } ifp->if_bytes = size; } /* * The file is in-lined in the on-disk inode. */ STATIC int xfs_iformat_local( struct xfs_inode *ip, struct xfs_dinode *dip, int whichfork, int size) { /* * If the size is unreasonable, then something * is wrong and we just bail out rather than crash in * kmalloc() or memcpy() below. */ if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { xfs_warn(ip->i_mount, "corrupt inode %llu (bad size %d for local fork, size = %zd).", (unsigned long long) ip->i_ino, size, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_local", dip, sizeof(*dip), __this_address); xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size); return 0; } /* * The file consists of a set of extents all of which fit into the on-disk * inode. */ STATIC int xfs_iformat_extents( struct xfs_inode *ip, struct xfs_dinode *dip, int whichfork) { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); int state = xfs_bmap_fork_to_state(whichfork); xfs_extnum_t nex = xfs_dfork_nextents(dip, whichfork); int size = nex * sizeof(xfs_bmbt_rec_t); struct xfs_iext_cursor icur; struct xfs_bmbt_rec *dp; struct xfs_bmbt_irec new; int i; /* * If the number of extents is unreasonable, then something is wrong and * we just bail out rather than crash in kmalloc() or memcpy() below. */ if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { xfs_warn(ip->i_mount, "corrupt inode %llu ((a)extents = %llu).", ip->i_ino, nex); xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_extents(1)", dip, sizeof(*dip), __this_address); xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } ifp->if_bytes = 0; ifp->if_data = NULL; ifp->if_height = 0; if (size) { dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); xfs_iext_first(ifp, &icur); for (i = 0; i < nex; i++, dp++) { xfs_failaddr_t fa; xfs_bmbt_disk_get_all(dp, &new); fa = xfs_bmap_validate_extent(ip, whichfork, &new); if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_extents(2)", dp, sizeof(*dp), fa); xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return xfs_bmap_complain_bad_rec(ip, whichfork, fa, &new); } xfs_iext_insert(ip, &icur, &new, state); trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); xfs_iext_next(ifp, &icur); } } return 0; } /* * The file has too many extents to fit into * the inode, so they are in B-tree format. * Allocate a buffer for the root of the B-tree * and copy the root into it. The i_extents * field will remain NULL until all of the * extents are read in (when they are needed). */ STATIC int xfs_iformat_btree( struct xfs_inode *ip, struct xfs_dinode *dip, int whichfork) { struct xfs_mount *mp = ip->i_mount; xfs_bmdr_block_t *dfp; struct xfs_ifork *ifp; struct xfs_btree_block *broot; int nrecs; int size; int level; ifp = xfs_ifork_ptr(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); size = xfs_bmap_broot_space(mp, dfp); nrecs = be16_to_cpu(dfp->bb_numrecs); level = be16_to_cpu(dfp->bb_level); /* * blow out if -- fork has less extents than can fit in * fork (fork shouldn't be a btree format), root btree * block has more records than can fit into the fork, * or the number of extents is greater than the number of * blocks. */ if (unlikely(ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork) || nrecs == 0 || xfs_bmdr_space_calc(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || ifp->if_nextents > ip->i_nblocks) || level == 0 || level > XFS_BM_MAXLEVELS(mp, whichfork)) { xfs_warn(mp, "corrupt inode %llu (btree).", (unsigned long long) ip->i_ino); xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_btree", dfp, size, __this_address); xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } broot = xfs_broot_alloc(ifp, size); /* * Copy and convert from the on-disk structure * to the in-memory structure. */ xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), broot, size); ifp->if_bytes = 0; ifp->if_data = NULL; ifp->if_height = 0; return 0; } int xfs_iformat_data_fork( struct xfs_inode *ip, struct xfs_dinode *dip) { struct inode *inode = VFS_I(ip); int error; /* * Initialize the extent count early, as the per-format routines may * depend on it. Use release semantics to set needextents /after/ we * set the format. This ensures that we can use acquire semantics on * needextents in xfs_need_iread_extents() and be guaranteed to see a * valid format value after that load. */ ip->i_df.if_format = dip->di_format; ip->i_df.if_nextents = xfs_dfork_data_extents(dip); smp_store_release(&ip->i_df.if_needextents, ip->i_df.if_format == XFS_DINODE_FMT_BTREE ? 1 : 0); switch (inode->i_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: ip->i_disk_size = 0; inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); return 0; case S_IFREG: case S_IFLNK: case S_IFDIR: switch (ip->i_df.if_format) { case XFS_DINODE_FMT_LOCAL: error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, be64_to_cpu(dip->di_size)); if (!error) error = xfs_ifork_verify_local_data(ip); return error; case XFS_DINODE_FMT_EXTENTS: return xfs_iformat_extents(ip, dip, XFS_DATA_FORK); case XFS_DINODE_FMT_BTREE: return xfs_iformat_btree(ip, dip, XFS_DATA_FORK); case XFS_DINODE_FMT_META_BTREE: switch (ip->i_metatype) { case XFS_METAFILE_RTRMAP: return xfs_iformat_rtrmap(ip, dip); case XFS_METAFILE_RTREFCOUNT: return xfs_iformat_rtrefcount(ip, dip); default: break; } fallthrough; default: xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), __this_address); xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } break; default: xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), __this_address); xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } } static uint16_t xfs_dfork_attr_shortform_size( struct xfs_dinode *dip) { struct xfs_attr_sf_hdr *sf = XFS_DFORK_APTR(dip); return be16_to_cpu(sf->totsize); } void xfs_ifork_init_attr( struct xfs_inode *ip, enum xfs_dinode_fmt format, xfs_extnum_t nextents) { /* * Initialize the extent count early, as the per-format routines may * depend on it. Use release semantics to set needextents /after/ we * set the format. This ensures that we can use acquire semantics on * needextents in xfs_need_iread_extents() and be guaranteed to see a * valid format value after that load. */ ip->i_af.if_format = format; ip->i_af.if_nextents = nextents; smp_store_release(&ip->i_af.if_needextents, ip->i_af.if_format == XFS_DINODE_FMT_BTREE ? 1 : 0); } void xfs_ifork_zap_attr( struct xfs_inode *ip) { xfs_idestroy_fork(&ip->i_af); memset(&ip->i_af, 0, sizeof(struct xfs_ifork)); ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS; } int xfs_iformat_attr_fork( struct xfs_inode *ip, struct xfs_dinode *dip) { xfs_extnum_t naextents = xfs_dfork_attr_extents(dip); int error = 0; /* * Initialize the extent count early, as the per-format routines may * depend on it. */ xfs_ifork_init_attr(ip, dip->di_aformat, naextents); switch (ip->i_af.if_format) { case XFS_DINODE_FMT_LOCAL: error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, xfs_dfork_attr_shortform_size(dip)); if (!error) error = xfs_ifork_verify_local_attr(ip); break; case XFS_DINODE_FMT_EXTENTS: error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); break; case XFS_DINODE_FMT_BTREE: error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); break; default: xfs_inode_verifier_error(ip, error, __func__, dip, sizeof(*dip), __this_address); xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); error = -EFSCORRUPTED; break; } if (error) xfs_ifork_zap_attr(ip); return error; } /* * Allocate the if_broot component of an inode fork so that it is @new_size * bytes in size, using __GFP_NOLOCKDEP like all the other code that * initializes a broot during inode load. Returns if_broot. */ struct xfs_btree_block * xfs_broot_alloc( struct xfs_ifork *ifp, size_t new_size) { ASSERT(ifp->if_broot == NULL); ifp->if_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); ifp->if_broot_bytes = new_size; return ifp->if_broot; } /* * Reallocate the if_broot component of an inode fork so that it is @new_size * bytes in size. Returns if_broot. */ struct xfs_btree_block * xfs_broot_realloc( struct xfs_ifork *ifp, size_t new_size) { /* No size change? No action needed. */ if (new_size == ifp->if_broot_bytes) return ifp->if_broot; /* New size is zero, free it. */ if (new_size == 0) { ifp->if_broot_bytes = 0; kfree(ifp->if_broot); ifp->if_broot = NULL; return NULL; } /* * Shrinking the iroot means we allocate a new smaller object and copy * it. We don't trust krealloc not to nop on realloc-down. */ if (ifp->if_broot_bytes > 0 && ifp->if_broot_bytes > new_size) { struct xfs_btree_block *old_broot = ifp->if_broot; ifp->if_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOFAIL); ifp->if_broot_bytes = new_size; memcpy(ifp->if_broot, old_broot, new_size); kfree(old_broot); return ifp->if_broot; } /* * Growing the iroot means we can krealloc. This may get us the same * object. */ ifp->if_broot = krealloc(ifp->if_broot, new_size, GFP_KERNEL | __GFP_NOFAIL); ifp->if_broot_bytes = new_size; return ifp->if_broot; } /* * This is called when the amount of space needed for if_data * is increased or decreased. The change in size is indicated by * the number of bytes that need to be added or deleted in the * byte_diff parameter. * * If the amount of space needed has decreased below the size of the * inline buffer, then switch to using the inline buffer. Otherwise, * use krealloc() or kmalloc() to adjust the size of the buffer * to what is needed. * * ip -- the inode whose if_data area is changing * byte_diff -- the change in the number of bytes, positive or negative, * requested for the if_data array. */ void * xfs_idata_realloc( struct xfs_inode *ip, int64_t byte_diff, int whichfork) { struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); int64_t new_size = ifp->if_bytes + byte_diff; ASSERT(new_size >= 0); ASSERT(new_size <= xfs_inode_fork_size(ip, whichfork)); if (byte_diff) { ifp->if_data = krealloc(ifp->if_data, new_size, GFP_KERNEL | __GFP_NOFAIL); if (new_size == 0) ifp->if_data = NULL; ifp->if_bytes = new_size; } return ifp->if_data; } /* Free all memory and reset a fork back to its initial state. */ void xfs_idestroy_fork( struct xfs_ifork *ifp) { if (ifp->if_broot != NULL) { kfree(ifp->if_broot); ifp->if_broot = NULL; } switch (ifp->if_format) { case XFS_DINODE_FMT_LOCAL: kfree(ifp->if_data); ifp->if_data = NULL; break; case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: if (ifp->if_height) xfs_iext_destroy(ifp); break; } } /* * Convert in-core extents to on-disk form * * In the case of the data fork, the in-core and on-disk fork sizes can be * different due to delayed allocation extents. We only copy on-disk extents * here, so callers must always use the physical fork size to determine the * size of the buffer passed to this routine. We will return the size actually * used. */ int xfs_iextents_copy( struct xfs_inode *ip, struct xfs_bmbt_rec *dp, int whichfork) { int state = xfs_bmap_fork_to_state(whichfork); struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); struct xfs_iext_cursor icur; struct xfs_bmbt_irec rec; int64_t copied = 0; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED); ASSERT(ifp->if_bytes > 0); for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) continue; ASSERT(xfs_bmap_validate_extent(ip, whichfork, &rec) == NULL); xfs_bmbt_disk_set_all(dp, &rec); trace_xfs_write_extent(ip, &icur, state, _RET_IP_); copied += sizeof(struct xfs_bmbt_rec); dp++; } ASSERT(copied > 0); ASSERT(copied <= ifp->if_bytes); return copied; } /* * Each of the following cases stores data into the same region * of the on-disk inode, so only one of them can be valid at * any given time. While it is possible to have conflicting formats * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is * in EXTENTS format, this can only happen when the fork has * changed formats after being modified but before being flushed. * In these cases, the format always takes precedence, because the * format indicates the current state of the fork. */ void xfs_iflush_fork( struct xfs_inode *ip, struct xfs_dinode *dip, struct xfs_inode_log_item *iip, int whichfork) { char *cp; struct xfs_ifork *ifp; xfs_mount_t *mp; static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; static const short extflag[2] = { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; if (!iip) return; ifp = xfs_ifork_ptr(ip, whichfork); /* * This can happen if we gave up in iformat in an error path, * for the attribute fork. */ if (!ifp) { ASSERT(whichfork == XFS_ATTR_FORK); return; } cp = XFS_DFORK_PTR(dip, whichfork); mp = ip->i_mount; switch (ifp->if_format) { case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_data != NULL); ASSERT(ifp->if_bytes <= xfs_inode_fork_size(ip, whichfork)); memcpy(cp, ifp->if_data, ifp->if_bytes); } break; case XFS_DINODE_FMT_EXTENTS: if ((iip->ili_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_nextents > 0); (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, whichfork); } break; case XFS_DINODE_FMT_BTREE: if ((iip->ili_fields & brootflag[whichfork]) && (ifp->if_broot_bytes > 0)) { ASSERT(ifp->if_broot != NULL); ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <= xfs_inode_fork_size(ip, whichfork)); xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, (xfs_bmdr_block_t *)cp, XFS_DFORK_SIZE(dip, mp, whichfork)); } break; case XFS_DINODE_FMT_DEV: if (iip->ili_fields & XFS_ILOG_DEV) { ASSERT(whichfork == XFS_DATA_FORK); xfs_dinode_put_rdev(dip, linux_to_xfs_dev_t(VFS_I(ip)->i_rdev)); } break; case XFS_DINODE_FMT_META_BTREE: ASSERT(whichfork == XFS_DATA_FORK); if (!(iip->ili_fields & brootflag[whichfork])) break; switch (ip->i_metatype) { case XFS_METAFILE_RTRMAP: xfs_iflush_rtrmap(ip, dip); break; case XFS_METAFILE_RTREFCOUNT: xfs_iflush_rtrefcount(ip, dip); break; default: ASSERT(0); break; } break; default: ASSERT(0); break; } } /* Convert bmap state flags to an inode fork. */ struct xfs_ifork * xfs_iext_state_to_fork( struct xfs_inode *ip, int state) { if (state & BMAP_COWFORK) return ip->i_cowfp; else if (state & BMAP_ATTRFORK) return &ip->i_af; return &ip->i_df; } /* * Initialize an inode's copy-on-write fork. */ void xfs_ifork_init_cow( struct xfs_inode *ip) { if (ip->i_cowfp) return; ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_cache, GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; } /* Verify the inline contents of the data fork of an inode. */ int xfs_ifork_verify_local_data( struct xfs_inode *ip) { xfs_failaddr_t fa = NULL; switch (VFS_I(ip)->i_mode & S_IFMT) { case S_IFDIR: { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); struct xfs_dir2_sf_hdr *sfp = ifp->if_data; fa = xfs_dir2_sf_verify(mp, sfp, ifp->if_bytes); break; } case S_IFLNK: { struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); fa = xfs_symlink_shortform_verify(ifp->if_data, ifp->if_bytes); break; } default: break; } if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", ip->i_df.if_data, ip->i_df.if_bytes, fa); return -EFSCORRUPTED; } return 0; } /* Verify the inline contents of the attr fork of an inode. */ int xfs_ifork_verify_local_attr( struct xfs_inode *ip) { struct xfs_ifork *ifp = &ip->i_af; xfs_failaddr_t fa; if (!xfs_inode_has_attr_fork(ip)) { fa = __this_address; } else { struct xfs_ifork *ifp = &ip->i_af; ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); fa = xfs_attr_shortform_verify(ifp->if_data, ifp->if_bytes); } if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", ifp->if_data, ifp->if_bytes, fa); return -EFSCORRUPTED; } return 0; } /* * Check if the inode fork supports adding nr_to_add more extents. * * If it doesn't but we can upgrade it to large extent counters, do the upgrade. * If we can't upgrade or are already using big counters but still can't fit the * additional extents, return -EFBIG. */ int xfs_iext_count_extend( struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, uint nr_to_add) { struct xfs_mount *mp = ip->i_mount; bool has_large = xfs_inode_has_large_extent_counts(ip); struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); uint64_t nr_exts; ASSERT(nr_to_add <= XFS_MAX_EXTCNT_UPGRADE_NR); if (whichfork == XFS_COW_FORK) return 0; /* no point in upgrading if if_nextents overflows */ nr_exts = ifp->if_nextents + nr_to_add; if (nr_exts < ifp->if_nextents) return -EFBIG; if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && nr_exts > 10) return -EFBIG; if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) { if (has_large || !xfs_has_large_extent_counts(mp)) return -EFBIG; ip->i_diflags2 |= XFS_DIFLAG2_NREXT64; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } return 0; } /* Decide if a file mapping is on the realtime device or not. */ bool xfs_ifork_is_realtime( struct xfs_inode *ip, int whichfork) { return XFS_IS_REALTIME_INODE(ip) && whichfork != XFS_ATTR_FORK; }
13 16 1 13 13 9 13 4 13 13 8 13 5 12 3 3 13 9 13 27 2 25 16 16 14 6 6 6 9 9 9 7 11 11 9 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_iwalk.h" #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_icache.h" #include "xfs_health.h" #include "xfs_trans.h" /* * Bulk Stat * ========= * * Use the inode walking functions to fill out struct xfs_bulkstat for every * allocated inode, then pass the stat information to some externally provided * iteration function. */ struct xfs_bstat_chunk { bulkstat_one_fmt_pf formatter; struct xfs_ibulk *breq; struct xfs_bulkstat *buf; }; static inline bool want_metadir_file( struct xfs_inode *ip, struct xfs_ibulk *breq) { return xfs_is_metadir_inode(ip) && (breq->flags & XFS_IBULK_METADIR); } /* * Fill out the bulkstat info for a single inode and report it somewhere. * * bc->breq->lastino is effectively the inode cursor as we walk through the * filesystem. Therefore, we update it any time we need to move the cursor * forward, regardless of whether or not we're sending any bstat information * back to userspace. If the inode is internal metadata or, has been freed * out from under us, we just simply keep going. * * However, if any other type of error happens we want to stop right where we * are so that userspace will call back with exact number of the bad inode and * we can send back an error code. * * Note that if the formatter tells us there's no space left in the buffer we * move the cursor forward and abort the walk. */ STATIC int xfs_bulkstat_one_int( struct xfs_mount *mp, struct mnt_idmap *idmap, struct xfs_trans *tp, xfs_ino_t ino, struct xfs_bstat_chunk *bc) { struct user_namespace *sb_userns = mp->m_super->s_user_ns; struct xfs_inode *ip; /* incore inode pointer */ struct inode *inode; struct xfs_bulkstat *buf = bc->buf; xfs_extnum_t nextents; int error = -EINVAL; vfsuid_t vfsuid; vfsgid_t vfsgid; error = xfs_iget(mp, tp, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); if (error == -ENOENT || error == -EINVAL) goto out_advance; if (error) goto out; /* Reload the incore unlinked list to avoid failure in inodegc. */ if (xfs_inode_unlinked_incomplete(ip)) { error = xfs_inode_reload_unlinked_bucket(tp, ip); if (error) { xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_irele(ip); return error; } } ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); inode = VFS_I(ip); vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); /* * If caller wants files from the metadata directories, push out the * bare minimum information for enabling scrub. */ if (want_metadir_file(ip, bc->breq)) { memset(buf, 0, sizeof(*buf)); buf->bs_ino = ino; buf->bs_gen = inode->i_generation; buf->bs_mode = inode->i_mode & S_IFMT; xfs_bulkstat_health(ip, buf); buf->bs_version = XFS_BULKSTAT_VERSION_V5; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); error = bc->formatter(bc->breq, buf); if (!error || error == -ECANCELED) goto out_advance; goto out; } /* If this is a private inode, don't leak its details to userspace. */ if (IS_PRIVATE(inode) || xfs_is_sb_inum(mp, ino)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); error = -EINVAL; goto out_advance; } /* xfs_iget returns the following without needing * further change. */ buf->bs_projectid = ip->i_projid; buf->bs_ino = ino; buf->bs_uid = from_kuid(sb_userns, vfsuid_into_kuid(vfsuid)); buf->bs_gid = from_kgid(sb_userns, vfsgid_into_kgid(vfsgid)); buf->bs_size = ip->i_disk_size; buf->bs_nlink = inode->i_nlink; buf->bs_atime = inode_get_atime_sec(inode); buf->bs_atime_nsec = inode_get_atime_nsec(inode); buf->bs_mtime = inode_get_mtime_sec(inode); buf->bs_mtime_nsec = inode_get_mtime_nsec(inode); buf->bs_ctime = inode_get_ctime_sec(inode); buf->bs_ctime_nsec = inode_get_ctime_nsec(inode); buf->bs_gen = inode->i_generation; buf->bs_mode = inode->i_mode; buf->bs_xflags = xfs_ip2xflags(ip); buf->bs_extsize_blks = ip->i_extsize; nextents = xfs_ifork_nextents(&ip->i_df); if (!(bc->breq->flags & XFS_IBULK_NREXT64)) buf->bs_extents = min(nextents, XFS_MAX_EXTCNT_DATA_FORK_SMALL); else buf->bs_extents64 = nextents; xfs_bulkstat_health(ip, buf); buf->bs_aextents = xfs_ifork_nextents(&ip->i_af); buf->bs_forkoff = xfs_inode_fork_boff(ip); buf->bs_version = XFS_BULKSTAT_VERSION_V5; if (xfs_has_v3inodes(mp)) { buf->bs_btime = ip->i_crtime.tv_sec; buf->bs_btime_nsec = ip->i_crtime.tv_nsec; if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) buf->bs_cowextsize_blks = ip->i_cowextsize; } switch (ip->i_df.if_format) { case XFS_DINODE_FMT_DEV: buf->bs_rdev = sysv_encode_dev(inode->i_rdev); buf->bs_blksize = BLKDEV_IOSIZE; buf->bs_blocks = 0; break; case XFS_DINODE_FMT_LOCAL: buf->bs_rdev = 0; buf->bs_blksize = mp->m_sb.sb_blocksize; buf->bs_blocks = 0; break; case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: buf->bs_rdev = 0; buf->bs_blksize = mp->m_sb.sb_blocksize; buf->bs_blocks = ip->i_nblocks + ip->i_delayed_blks; break; } xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); error = bc->formatter(bc->breq, buf); if (error == -ECANCELED) goto out_advance; if (error) goto out; out_advance: /* * Advance the cursor to the inode that comes after the one we just * looked at. We want the caller to move along if the bulkstat * information was copied successfully; if we tried to grab the inode * but it's no longer allocated; or if it's internal metadata. */ bc->breq->startino = ino + 1; out: return error; } /* Bulkstat a single inode. */ int xfs_bulkstat_one( struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter) { struct xfs_bstat_chunk bc = { .formatter = formatter, .breq = breq, }; struct xfs_trans *tp; int error; if (breq->idmap != &nop_mnt_idmap) { xfs_warn_ratelimited(breq->mp, "bulkstat not supported inside of idmapped mounts."); return -EINVAL; } ASSERT(breq->icount == 1); bc.buf = kzalloc(sizeof(struct xfs_bulkstat), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!bc.buf) return -ENOMEM; /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ error = xfs_trans_alloc_empty(breq->mp, &tp); if (error) goto out; error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp, breq->startino, &bc); xfs_trans_cancel(tp); out: kfree(bc.buf); /* * If we reported one inode to userspace then we abort because we hit * the end of the buffer. Don't leak that back to userspace. */ if (error == -ECANCELED) error = 0; return error; } static int xfs_bulkstat_iwalk( struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, void *data) { struct xfs_bstat_chunk *bc = data; int error; error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data); /* bulkstat just skips over missing inodes */ if (error == -ENOENT || error == -EINVAL) return 0; return error; } /* * Check the incoming lastino parameter. * * We allow any inode value that could map to physical space inside the * filesystem because if there are no inodes there, bulkstat moves on to the * next chunk. In other words, the magic agino value of zero takes us to the * first chunk in the AG, and an agino value past the end of the AG takes us to * the first chunk in the next AG. * * Therefore we can end early if the requested inode is beyond the end of the * filesystem or doesn't map properly. */ static inline bool xfs_bulkstat_already_done( struct xfs_mount *mp, xfs_ino_t startino) { xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); return agno >= mp->m_sb.sb_agcount || startino != XFS_AGINO_TO_INO(mp, agno, agino); } /* Return stat information in bulk (by-inode) for the filesystem. */ int xfs_bulkstat( struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter) { struct xfs_bstat_chunk bc = { .formatter = formatter, .breq = breq, }; struct xfs_trans *tp; unsigned int iwalk_flags = 0; int error; if (breq->idmap != &nop_mnt_idmap) { xfs_warn_ratelimited(breq->mp, "bulkstat not supported inside of idmapped mounts."); return -EINVAL; } if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; bc.buf = kzalloc(sizeof(struct xfs_bulkstat), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!bc.buf) return -ENOMEM; /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ error = xfs_trans_alloc_empty(breq->mp, &tp); if (error) goto out; if (breq->flags & XFS_IBULK_SAME_AG) iwalk_flags |= XFS_IWALK_SAME_AG; error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, xfs_bulkstat_iwalk, breq->icount, &bc); xfs_trans_cancel(tp); out: kfree(bc.buf); /* * We found some inodes, so clear the error status and return them. * The lastino pointer will point directly at the inode that triggered * any error that occurred, so on the next call the error will be * triggered again and propagated to userspace as there will be no * formatted inodes in the buffer. */ if (breq->ocount > 0) error = 0; return error; } /* Convert bulkstat (v5) to bstat (v1). */ void xfs_bulkstat_to_bstat( struct xfs_mount *mp, struct xfs_bstat *bs1, const struct xfs_bulkstat *bstat) { /* memset is needed here because of padding holes in the structure. */ memset(bs1, 0, sizeof(struct xfs_bstat)); bs1->bs_ino = bstat->bs_ino; bs1->bs_mode = bstat->bs_mode; bs1->bs_nlink = bstat->bs_nlink; bs1->bs_uid = bstat->bs_uid; bs1->bs_gid = bstat->bs_gid; bs1->bs_rdev = bstat->bs_rdev; bs1->bs_blksize = bstat->bs_blksize; bs1->bs_size = bstat->bs_size; bs1->bs_atime.tv_sec = bstat->bs_atime; bs1->bs_mtime.tv_sec = bstat->bs_mtime; bs1->bs_ctime.tv_sec = bstat->bs_ctime; bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; bs1->bs_blocks = bstat->bs_blocks; bs1->bs_xflags = bstat->bs_xflags; bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); bs1->bs_extents = bstat->bs_extents; bs1->bs_gen = bstat->bs_gen; bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; bs1->bs_forkoff = bstat->bs_forkoff; bs1->bs_projid_hi = bstat->bs_projectid >> 16; bs1->bs_sick = bstat->bs_sick; bs1->bs_checked = bstat->bs_checked; bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); bs1->bs_dmevmask = 0; bs1->bs_dmstate = 0; bs1->bs_aextents = bstat->bs_aextents; } struct xfs_inumbers_chunk { inumbers_fmt_pf formatter; struct xfs_ibulk *breq; }; /* * INUMBERS * ======== * This is how we export inode btree records to userspace, so that XFS tools * can figure out where inodes are allocated. */ /* * Format the inode group structure and report it somewhere. * * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk * through the filesystem so we move it forward unless there was a runtime * error. If the formatter tells us the buffer is now full we also move the * cursor forward and abort the walk. */ STATIC int xfs_inumbers_walk( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, const struct xfs_inobt_rec_incore *irec, void *data) { struct xfs_inumbers inogrp = { .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), .xi_alloccount = irec->ir_count - irec->ir_freecount, .xi_allocmask = ~irec->ir_free, .xi_version = XFS_INUMBERS_VERSION_V5, }; struct xfs_inumbers_chunk *ic = data; int error; error = ic->formatter(ic->breq, &inogrp); if (error && error != -ECANCELED) return error; ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + XFS_INODES_PER_CHUNK; return error; } /* * Return inode number table for the filesystem. */ int xfs_inumbers( struct xfs_ibulk *breq, inumbers_fmt_pf formatter) { struct xfs_inumbers_chunk ic = { .formatter = formatter, .breq = breq, }; struct xfs_trans *tp; int error = 0; if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ error = xfs_trans_alloc_empty(breq->mp, &tp); if (error) goto out; error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, xfs_inumbers_walk, breq->icount, &ic); xfs_trans_cancel(tp); out: /* * We found some inode groups, so clear the error status and return * them. The lastino pointer will point directly at the inode that * triggered any error that occurred, so on the next call the error * will be triggered again and propagated to userspace as there will be * no formatted inode groups in the buffer. */ if (breq->ocount > 0) error = 0; return error; } /* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ void xfs_inumbers_to_inogrp( struct xfs_inogrp *ig1, const struct xfs_inumbers *ig) { /* memset is needed here because of padding holes in the structure. */ memset(ig1, 0, sizeof(struct xfs_inogrp)); ig1->xi_startino = ig->xi_startino; ig1->xi_alloccount = ig->xi_alloccount; ig1->xi_allocmask = ig->xi_allocmask; }
11 11 10 10 1 7 2 1 1 3 4 4 4 3 3 3 3 157 157 151 5 5 5 5 5 146 147 147 136 7 10 10 147 3 2 2 2 1 1 2 2 2 1 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "disk_groups.h" #include "sb-members.h" #include "super-io.h" #include <linux/sort.h> static int group_cmp(const void *_l, const void *_r) { const struct bch_disk_group *l = _l; const struct bch_disk_group *r = _r; return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) - (BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?: ((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) - (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?: strncmp(l->label, r->label, sizeof(l->label)); } static int bch2_sb_disk_groups_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { struct bch_sb_field_disk_groups *groups = field_to_type(f, disk_groups); struct bch_disk_group *g, *sorted = NULL; unsigned nr_groups = disk_groups_nr(groups); unsigned i, len; int ret = 0; for (i = 0; i < sb->nr_devices; i++) { struct bch_member m = bch2_sb_member_get(sb, i); unsigned group_id; if (!BCH_MEMBER_GROUP(&m)) continue; group_id = BCH_MEMBER_GROUP(&m) - 1; if (group_id >= nr_groups) { prt_printf(err, "disk %u has invalid label %u (have %u)", i, group_id, nr_groups); return -BCH_ERR_invalid_sb_disk_groups; } if (BCH_GROUP_DELETED(&groups->entries[group_id])) { prt_printf(err, "disk %u has deleted label %u", i, group_id); return -BCH_ERR_invalid_sb_disk_groups; } } if (!nr_groups) return 0; for (i = 0; i < nr_groups; i++) { g = groups->entries + i; if (BCH_GROUP_DELETED(g)) continue; len = strnlen(g->label, sizeof(g->label)); if (!len) { prt_printf(err, "label %u empty", i); return -BCH_ERR_invalid_sb_disk_groups; } } sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL); if (!sorted) return -BCH_ERR_ENOMEM_disk_groups_validate; memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted)); sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL); for (g = sorted; g + 1 < sorted + nr_groups; g++) if (!BCH_GROUP_DELETED(g) && !group_cmp(&g[0], &g[1])) { prt_printf(err, "duplicate label %llu.%.*s", BCH_GROUP_PARENT(g), (int) sizeof(g->label), g->label); ret = -BCH_ERR_invalid_sb_disk_groups; goto err; } err: kfree(sorted); return ret; } void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) { out->atomic++; rcu_read_lock(); struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups); if (!g) goto out; for (unsigned i = 0; i < g->nr; i++) { if (i) prt_printf(out, " "); if (g->entries[i].deleted) { prt_printf(out, "[deleted]"); continue; } prt_printf(out, "[parent %d devs", g->entries[i].parent); for_each_member_device_rcu(c, ca, &g->entries[i].devs) prt_printf(out, " %s", ca->name); prt_printf(out, "]"); } out: rcu_read_unlock(); out->atomic--; } static void bch2_sb_disk_groups_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field *f) { struct bch_sb_field_disk_groups *groups = field_to_type(f, disk_groups); struct bch_disk_group *g; unsigned nr_groups = disk_groups_nr(groups); for (g = groups->entries; g < groups->entries + nr_groups; g++) { if (g != groups->entries) prt_printf(out, " "); if (BCH_GROUP_DELETED(g)) prt_printf(out, "[deleted]"); else prt_printf(out, "[parent %llu name %s]", BCH_GROUP_PARENT(g), g->label); } } const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = { .validate = bch2_sb_disk_groups_validate, .to_text = bch2_sb_disk_groups_to_text }; int bch2_sb_disk_groups_to_cpu(struct bch_fs *c) { struct bch_sb_field_disk_groups *groups; struct bch_disk_groups_cpu *cpu_g, *old_g; unsigned i, g, nr_groups; lockdep_assert_held(&c->sb_lock); groups = bch2_sb_field_get(c->disk_sb.sb, disk_groups); nr_groups = disk_groups_nr(groups); if (!groups) return 0; cpu_g = kzalloc(struct_size(cpu_g, entries, nr_groups), GFP_KERNEL); if (!cpu_g) return -BCH_ERR_ENOMEM_disk_groups_to_cpu; cpu_g->nr = nr_groups; for (i = 0; i < nr_groups; i++) { struct bch_disk_group *src = &groups->entries[i]; struct bch_disk_group_cpu *dst = &cpu_g->entries[i]; dst->deleted = BCH_GROUP_DELETED(src); dst->parent = BCH_GROUP_PARENT(src); memcpy(dst->label, src->label, sizeof(dst->label)); } for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, i); struct bch_disk_group_cpu *dst; if (!bch2_member_alive(&m)) continue; g = BCH_MEMBER_GROUP(&m); while (g) { dst = &cpu_g->entries[g - 1]; __set_bit(i, dst->devs.d); g = dst->parent; } } old_g = rcu_dereference_protected(c->disk_groups, lockdep_is_held(&c->sb_lock)); rcu_assign_pointer(c->disk_groups, cpu_g); if (old_g) kfree_rcu(old_g, rcu); return 0; } const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target) { struct target t = target_decode(target); struct bch_devs_mask *devs; rcu_read_lock(); switch (t.type) { case TARGET_NULL: devs = NULL; break; case TARGET_DEV: { struct bch_dev *ca = t.dev < c->sb.nr_devices ? rcu_dereference(c->devs[t.dev]) : NULL; devs = ca ? &ca->self : NULL; break; } case TARGET_GROUP: { struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups); devs = g && t.group < g->nr && !g->entries[t.group].deleted ? &g->entries[t.group].devs : NULL; break; } default: BUG(); } rcu_read_unlock(); return devs; } bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target) { struct target t = target_decode(target); switch (t.type) { case TARGET_NULL: return false; case TARGET_DEV: return dev == t.dev; case TARGET_GROUP: { struct bch_disk_groups_cpu *g; const struct bch_devs_mask *m; bool ret; rcu_read_lock(); g = rcu_dereference(c->disk_groups); m = g && t.group < g->nr && !g->entries[t.group].deleted ? &g->entries[t.group].devs : NULL; ret = m ? test_bit(dev, m->d) : false; rcu_read_unlock(); return ret; } default: BUG(); } } static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups, unsigned parent, const char *name, unsigned namelen) { unsigned i, nr_groups = disk_groups_nr(groups); if (!namelen || namelen > BCH_SB_LABEL_SIZE) return -EINVAL; for (i = 0; i < nr_groups; i++) { struct bch_disk_group *g = groups->entries + i; if (BCH_GROUP_DELETED(g)) continue; if (!BCH_GROUP_DELETED(g) && BCH_GROUP_PARENT(g) == parent && strnlen(g->label, sizeof(g->label)) == namelen && !memcmp(name, g->label, namelen)) return i; } return -1; } static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent, const char *name, unsigned namelen) { struct bch_sb_field_disk_groups *groups = bch2_sb_field_get(sb->sb, disk_groups); unsigned i, nr_groups = disk_groups_nr(groups); struct bch_disk_group *g; if (!namelen || namelen > BCH_SB_LABEL_SIZE) return -EINVAL; for (i = 0; i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]); i++) ; if (i == nr_groups) { unsigned u64s = (sizeof(struct bch_sb_field_disk_groups) + sizeof(struct bch_disk_group) * (nr_groups + 1)) / sizeof(u64); groups = bch2_sb_field_resize(sb, disk_groups, u64s); if (!groups) return -BCH_ERR_ENOSPC_disk_label_add; nr_groups = disk_groups_nr(groups); } BUG_ON(i >= nr_groups); g = &groups->entries[i]; memcpy(g->label, name, namelen); if (namelen < sizeof(g->label)) g->label[namelen] = '\0'; SET_BCH_GROUP_DELETED(g, 0); SET_BCH_GROUP_PARENT(g, parent); SET_BCH_GROUP_DATA_ALLOWED(g, ~0); return i; } int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name) { struct bch_sb_field_disk_groups *groups = bch2_sb_field_get(sb->sb, disk_groups); int v = -1; do { const char *next = strchrnul(name, '.'); unsigned len = next - name; if (*next == '.') next++; v = __bch2_disk_group_find(groups, v + 1, name, len); name = next; } while (*name && v >= 0); return v; } int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name) { struct bch_sb_field_disk_groups *groups; unsigned parent = 0; int v = -1; do { const char *next = strchrnul(name, '.'); unsigned len = next - name; if (*next == '.') next++; groups = bch2_sb_field_get(sb->sb, disk_groups); v = __bch2_disk_group_find(groups, parent, name, len); if (v < 0) v = __bch2_disk_group_add(sb, parent, name, len); if (v < 0) return v; parent = v + 1; name = next; } while (*name && v >= 0); return v; } void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) { struct bch_disk_groups_cpu *groups; struct bch_disk_group_cpu *g; unsigned nr = 0; u16 path[32]; out->atomic++; rcu_read_lock(); groups = rcu_dereference(c->disk_groups); if (!groups) goto invalid; while (1) { if (nr == ARRAY_SIZE(path)) goto invalid; if (v >= groups->nr) goto invalid; g = groups->entries + v; if (g->deleted) goto invalid; path[nr++] = v; if (!g->parent) break; v = g->parent - 1; } while (nr) { v = path[--nr]; g = groups->entries + v; prt_printf(out, "%.*s", (int) sizeof(g->label), g->label); if (nr) prt_printf(out, "."); } out: rcu_read_unlock(); out->atomic--; return; invalid: prt_printf(out, "invalid label %u", v); goto out; } void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) { struct bch_sb_field_disk_groups *groups = bch2_sb_field_get(sb, disk_groups); struct bch_disk_group *g; unsigned nr = 0; u16 path[32]; while (1) { if (nr == ARRAY_SIZE(path)) goto inval; if (v >= disk_groups_nr(groups)) goto inval; g = groups->entries + v; if (BCH_GROUP_DELETED(g)) goto inval; path[nr++] = v; if (!BCH_GROUP_PARENT(g)) break; v = BCH_GROUP_PARENT(g) - 1; } while (nr) { v = path[--nr]; g = groups->entries + v; prt_printf(out, "%.*s", (int) sizeof(g->label), g->label); if (nr) prt_printf(out, "."); } return; inval: prt_printf(out, "invalid label %u", v); } int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { struct bch_member *mi; int ret, v = -1; if (!strlen(name) || !strcmp(name, "none")) return 0; v = bch2_disk_path_find_or_create(&c->disk_sb, name); if (v < 0) return v; ret = bch2_sb_disk_groups_to_cpu(c); if (ret) return ret; mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); SET_BCH_MEMBER_GROUP(mi, v + 1); return 0; } int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { int ret; mutex_lock(&c->sb_lock); ret = __bch2_dev_group_set(c, ca, name) ?: bch2_write_super(c); mutex_unlock(&c->sb_lock); return ret; } int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, struct printbuf *err) { struct bch_dev *ca; int g; if (!val) return -EINVAL; if (!c) return -BCH_ERR_option_needs_open_fs; if (!strlen(val) || !strcmp(val, "none")) { *res = 0; return 0; } /* Is it a device? */ ca = bch2_dev_lookup(c, val); if (!IS_ERR(ca)) { *res = dev_to_target(ca->dev_idx); bch2_dev_put(ca); return 0; } mutex_lock(&c->sb_lock); g = bch2_disk_path_find(&c->disk_sb, val); mutex_unlock(&c->sb_lock); if (g >= 0) { *res = group_to_target(g); return 0; } return -EINVAL; } void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) { struct target t = target_decode(v); switch (t.type) { case TARGET_NULL: prt_printf(out, "none"); break; case TARGET_DEV: { struct bch_dev *ca; out->atomic++; rcu_read_lock(); ca = t.dev < c->sb.nr_devices ? rcu_dereference(c->devs[t.dev]) : NULL; if (ca && percpu_ref_tryget(&ca->io_ref)) { prt_printf(out, "/dev/%s", ca->name); percpu_ref_put(&ca->io_ref); } else if (ca) { prt_printf(out, "offline device %u", t.dev); } else { prt_printf(out, "invalid device %u", t.dev); } rcu_read_unlock(); out->atomic--; break; } case TARGET_GROUP: bch2_disk_path_to_text(out, c, t.group); break; default: BUG(); } } static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) { struct target t = target_decode(v); switch (t.type) { case TARGET_NULL: prt_printf(out, "none"); break; case TARGET_DEV: { struct bch_member m = bch2_sb_member_get(sb, t.dev); if (bch2_member_exists(sb, t.dev)) { prt_printf(out, "Device "); pr_uuid(out, m.uuid.b); prt_printf(out, " (%u)", t.dev); } else { prt_printf(out, "Bad device %u", t.dev); } break; } case TARGET_GROUP: bch2_disk_path_to_text_sb(out, sb, t.group); break; default: BUG(); } } void bch2_opt_target_to_text(struct printbuf *out, struct bch_fs *c, struct bch_sb *sb, u64 v) { if (c) bch2_target_to_text(out, c, v); else bch2_target_to_text_sb(out, sb, v); }
1 2 1 3 3 29 13 16 16 16 10 4 6 6 6 3 3 3 3 3 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 // SPDX-License-Identifier: GPL-2.0 /* * Support for async notification of waitid */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/compat.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "cancel.h" #include "waitid.h" #include "../kernel/exit.h" static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts); #define IO_WAITID_CANCEL_FLAG BIT(31) #define IO_WAITID_REF_MASK GENMASK(30, 0) struct io_waitid { struct file *file; int which; pid_t upid; int options; atomic_t refs; struct wait_queue_head *head; struct siginfo __user *infop; struct waitid_info info; }; static void io_waitid_free(struct io_kiocb *req) { struct io_waitid_async *iwa = req->async_data; put_pid(iwa->wo.wo_pid); kfree(req->async_data); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } #ifdef CONFIG_COMPAT static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo) { struct compat_siginfo __user *infop; bool ret; infop = (struct compat_siginfo __user *) iw->infop; if (!user_write_access_begin(infop, sizeof(*infop))) return false; unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); unsafe_put_user(iw->info.cause, &infop->si_code, Efault); unsafe_put_user(iw->info.pid, &infop->si_pid, Efault); unsafe_put_user(iw->info.uid, &infop->si_uid, Efault); unsafe_put_user(iw->info.status, &infop->si_status, Efault); ret = true; done: user_write_access_end(); return ret; Efault: ret = false; goto done; } #endif static bool io_waitid_copy_si(struct io_kiocb *req, int signo) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); bool ret; if (!iw->infop) return true; #ifdef CONFIG_COMPAT if (req->ctx->compat) return io_waitid_compat_copy_si(iw, signo); #endif if (!user_write_access_begin(iw->infop, sizeof(*iw->infop))) return false; unsafe_put_user(signo, &iw->infop->si_signo, Efault); unsafe_put_user(0, &iw->infop->si_errno, Efault); unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault); unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault); unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault); unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault); ret = true; done: user_write_access_end(); return ret; Efault: ret = false; goto done; } static int io_waitid_finish(struct io_kiocb *req, int ret) { int signo = 0; if (ret > 0) { signo = SIGCHLD; ret = 0; } if (!io_waitid_copy_si(req, signo)) ret = -EFAULT; io_waitid_free(req); return ret; } static void io_waitid_complete(struct io_kiocb *req, int ret) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); struct io_tw_state ts = {}; /* anyone completing better be holding a reference */ WARN_ON_ONCE(!(atomic_read(&iw->refs) & IO_WAITID_REF_MASK)); lockdep_assert_held(&req->ctx->uring_lock); hlist_del_init(&req->hash_node); ret = io_waitid_finish(req, ret); if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); io_req_task_complete(req, &ts); } static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); struct io_waitid_async *iwa = req->async_data; /* * Mark us canceled regardless of ownership. This will prevent a * potential retry from a spurious wakeup. */ atomic_or(IO_WAITID_CANCEL_FLAG, &iw->refs); /* claim ownership */ if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK) return false; spin_lock_irq(&iw->head->lock); list_del_init(&iwa->wo.child_wait.entry); spin_unlock_irq(&iw->head->lock); io_waitid_complete(req, -ECANCELED); return true; } int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, unsigned int issue_flags) { struct hlist_node *tmp; struct io_kiocb *req; int nr = 0; if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) return -ENOENT; io_ring_submit_lock(ctx, issue_flags); hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) { if (req->cqe.user_data != cd->data && !(cd->flags & IORING_ASYNC_CANCEL_ANY)) continue; if (__io_waitid_cancel(ctx, req)) nr++; if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) break; } io_ring_submit_unlock(ctx, issue_flags); if (nr) return nr; return -ENOENT; } bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, bool cancel_all) { struct hlist_node *tmp; struct io_kiocb *req; bool found = false; lockdep_assert_held(&ctx->uring_lock); hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) { if (!io_match_task_safe(req, tctx, cancel_all)) continue; hlist_del_init(&req->hash_node); __io_waitid_cancel(ctx, req); found = true; } return found; } static inline bool io_waitid_drop_issue_ref(struct io_kiocb *req) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); struct io_waitid_async *iwa = req->async_data; if (!atomic_sub_return(1, &iw->refs)) return false; /* * Wakeup triggered, racing with us. It was prevented from * completing because of that, queue up the tw to do that. */ req->io_task_work.func = io_waitid_cb; io_req_task_work_add(req); remove_wait_queue(iw->head, &iwa->wo.child_wait); return true; } static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts) { struct io_waitid_async *iwa = req->async_data; struct io_ring_ctx *ctx = req->ctx; int ret; io_tw_lock(ctx, ts); ret = __do_wait(&iwa->wo); /* * If we get -ERESTARTSYS here, we need to re-arm and check again * to ensure we get another callback. If the retry works, then we can * just remove ourselves from the waitqueue again and finish the * request. */ if (unlikely(ret == -ERESTARTSYS)) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); /* Don't retry if cancel found it meanwhile */ ret = -ECANCELED; if (!(atomic_read(&iw->refs) & IO_WAITID_CANCEL_FLAG)) { iw->head = &current->signal->wait_chldexit; add_wait_queue(iw->head, &iwa->wo.child_wait); ret = __do_wait(&iwa->wo); if (ret == -ERESTARTSYS) { /* retry armed, drop our ref */ io_waitid_drop_issue_ref(req); return; } remove_wait_queue(iw->head, &iwa->wo.child_wait); } } io_waitid_complete(req, ret); } static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait); struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo); struct io_kiocb *req = iwa->req; struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); struct task_struct *p = key; if (!pid_child_should_wake(wo, p)) return 0; /* cancel is in progress */ if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK) return 1; req->io_task_work.func = io_waitid_cb; io_req_task_work_add(req); list_del_init(&wait->entry); return 1; } int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags) return -EINVAL; iw->which = READ_ONCE(sqe->len); iw->upid = READ_ONCE(sqe->fd); iw->options = READ_ONCE(sqe->file_index); iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2)); return 0; } int io_waitid(struct io_kiocb *req, unsigned int issue_flags) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); struct io_ring_ctx *ctx = req->ctx; struct io_waitid_async *iwa; int ret; iwa = io_uring_alloc_async_data(NULL, req); if (!iwa) return -ENOMEM; iwa->req = req; ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info, iw->options, NULL); if (ret) goto done; /* * Mark the request as busy upfront, in case we're racing with the * wakeup. If we are, then we'll notice when we drop this initial * reference again after arming. */ atomic_set(&iw->refs, 1); /* * Cancel must hold the ctx lock, so there's no risk of cancelation * finding us until a) we remain on the list, and b) the lock is * dropped. We only need to worry about racing with the wakeup * callback. */ io_ring_submit_lock(ctx, issue_flags); hlist_add_head(&req->hash_node, &ctx->waitid_list); init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait); iwa->wo.child_wait.private = req->tctx->task; iw->head = &current->signal->wait_chldexit; add_wait_queue(iw->head, &iwa->wo.child_wait); ret = __do_wait(&iwa->wo); if (ret == -ERESTARTSYS) { /* * Nobody else grabbed a reference, it'll complete when we get * a waitqueue callback, or if someone cancels it. */ if (!io_waitid_drop_issue_ref(req)) { io_ring_submit_unlock(ctx, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } /* * Wakeup triggered, racing with us. It was prevented from * completing because of that, queue up the tw to do that. */ io_ring_submit_unlock(ctx, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } hlist_del_init(&req->hash_node); remove_wait_queue(iw->head, &iwa->wo.child_wait); ret = io_waitid_finish(req, ret); io_ring_submit_unlock(ctx, issue_flags); done: if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_OK; }
17 17 17 17 82 1 4 71 4 1 3 1 3 40 32 9 8 49 5 54 4 2 49 49 4 9 6 3 29 1 6 8 15 14 1 4 38 94 84 9 3 14 14 1 15 9 12 8 6 1 3 8 7 15 11 4 8 8 2 6 4 3 104 9 98 3 1 94 2 2 9 4 5 5 1 89 19 18 5 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/plist.h> #include <linux/sched/task.h> #include <linux/sched/signal.h> #include <linux/freezer.h> #include "futex.h" /* * READ this before attempting to hack on futexes! * * Basic futex operation and ordering guarantees * ============================================= * * The waiter reads the futex value in user space and calls * futex_wait(). This function computes the hash bucket and acquires * the hash bucket lock. After that it reads the futex user space value * again and verifies that the data has not changed. If it has not changed * it enqueues itself into the hash bucket, releases the hash bucket lock * and schedules. * * The waker side modifies the user space value of the futex and calls * futex_wake(). This function computes the hash bucket and acquires the * hash bucket lock. Then it looks for waiters on that futex in the hash * bucket and wakes them. * * In futex wake up scenarios where no tasks are blocked on a futex, taking * the hb spinlock can be avoided and simply return. In order for this * optimization to work, ordering guarantees must exist so that the waiter * being added to the list is acknowledged when the list is concurrently being * checked by the waker, avoiding scenarios like the following: * * CPU 0 CPU 1 * val = *futex; * sys_futex(WAIT, futex, val); * futex_wait(futex, val); * uval = *futex; * *futex = newval; * sys_futex(WAKE, futex); * futex_wake(futex); * if (queue_empty()) * return; * if (uval == val) * lock(hash_bucket(futex)); * queue(); * unlock(hash_bucket(futex)); * schedule(); * * This would cause the waiter on CPU 0 to wait forever because it * missed the transition of the user space value from val to newval * and the waker did not find the waiter in the hash bucket queue. * * The correct serialization ensures that a waiter either observes * the changed user space value before blocking or is woken by a * concurrent waker: * * CPU 0 CPU 1 * val = *futex; * sys_futex(WAIT, futex, val); * futex_wait(futex, val); * * waiters++; (a) * smp_mb(); (A) <-- paired with -. * | * lock(hash_bucket(futex)); | * | * uval = *futex; | * | *futex = newval; * | sys_futex(WAKE, futex); * | futex_wake(futex); * | * `--------> smp_mb(); (B) * if (uval == val) * queue(); * unlock(hash_bucket(futex)); * schedule(); if (waiters) * lock(hash_bucket(futex)); * else wake_waiters(futex); * waiters--; (b) unlock(hash_bucket(futex)); * * Where (A) orders the waiters increment and the futex value read through * atomic operations (see futex_hb_waiters_inc) and where (B) orders the write * to futex and the waiters read (see futex_hb_waiters_pending()). * * This yields the following case (where X:=waiters, Y:=futex): * * X = Y = 0 * * w[X]=1 w[Y]=1 * MB MB * r[Y]=y r[X]=x * * Which guarantees that x==0 && y==0 is impossible; which translates back into * the guarantee that we cannot both miss the futex variable change and the * enqueue. * * Note that a new waiter is accounted for in (a) even when it is possible that * the wait call can return error, in which case we backtrack from it in (b). * Refer to the comment in futex_q_lock(). * * Similarly, in order to account for waiters being requeued on another * address we always increment the waiters for the destination bucket before * acquiring the lock. It then decrements them again after releasing it - * the code that actually moves the futex(es) between hash buckets (requeue_futex) * will do the additional required waiter count housekeeping. This is done for * double_lock_hb() and double_unlock_hb(), respectively. */ bool __futex_wake_mark(struct futex_q *q) { if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) return false; __futex_unqueue(q); /* * The waiting task can free the futex_q as soon as q->lock_ptr = NULL * is written, without taking any locks. This is possible in the event * of a spurious wakeup, for example. A memory barrier is required here * to prevent the following store to lock_ptr from getting ahead of the * plist_del in __futex_unqueue(). */ smp_store_release(&q->lock_ptr, NULL); return true; } /* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. Callers * must ensure to later call wake_up_q() for the actual * wakeups to occur. */ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q) { struct task_struct *p = q->task; get_task_struct(p); if (!__futex_wake_mark(q)) { put_task_struct(p); return; } /* * Queue the task for later wakeup for after we've released * the hb->lock. */ wake_q_add_safe(wake_q, p); } /* * Wake up waiters matching bitset queued on this futex (uaddr). */ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) { struct futex_hash_bucket *hb; struct futex_q *this, *next; union futex_key key = FUTEX_KEY_INIT; DEFINE_WAKE_Q(wake_q); int ret; if (!bitset) return -EINVAL; ret = get_futex_key(uaddr, flags, &key, FUTEX_READ); if (unlikely(ret != 0)) return ret; if ((flags & FLAGS_STRICT) && !nr_wake) return 0; hb = futex_hash(&key); /* Make sure we really have tasks to wakeup */ if (!futex_hb_waiters_pending(hb)) return ret; spin_lock(&hb->lock); plist_for_each_entry_safe(this, next, &hb->chain, list) { if (futex_match (&this->key, &key)) { if (this->pi_state || this->rt_waiter) { ret = -EINVAL; break; } /* Check if one of the bits is set in both bitsets */ if (!(this->bitset & bitset)) continue; this->wake(&wake_q, this); if (++ret >= nr_wake) break; } } spin_unlock(&hb->lock); wake_up_q(&wake_q); return ret; } static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { unsigned int op = (encoded_op & 0x70000000) >> 28; unsigned int cmp = (encoded_op & 0x0f000000) >> 24; int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); int cmparg = sign_extend32(encoded_op & 0x00000fff, 11); int oldval, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { if (oparg < 0 || oparg > 31) { /* * kill this print and return -EINVAL when userspace * is sane again */ pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n", current->comm, oparg); oparg &= 31; } oparg = 1 << oparg; } pagefault_disable(); ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); pagefault_enable(); if (ret) return ret; switch (cmp) { case FUTEX_OP_CMP_EQ: return oldval == cmparg; case FUTEX_OP_CMP_NE: return oldval != cmparg; case FUTEX_OP_CMP_LT: return oldval < cmparg; case FUTEX_OP_CMP_GE: return oldval >= cmparg; case FUTEX_OP_CMP_LE: return oldval <= cmparg; case FUTEX_OP_CMP_GT: return oldval > cmparg; default: return -ENOSYS; } } /* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; int ret, op_ret; DEFINE_WAKE_Q(wake_q); retry: ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ); if (unlikely(ret != 0)) return ret; ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) return ret; hb1 = futex_hash(&key1); hb2 = futex_hash(&key2); retry_private: double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { double_unlock_hb(hb1, hb2); if (!IS_ENABLED(CONFIG_MMU) || unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { /* * we don't get EFAULT from MMU faults if we don't have * an MMU, but we might get them from range checking */ ret = op_ret; return ret; } if (op_ret == -EFAULT) { ret = fault_in_user_writeable(uaddr2); if (ret) return ret; } cond_resched(); if (!(flags & FLAGS_SHARED)) goto retry_private; goto retry; } plist_for_each_entry_safe(this, next, &hb1->chain, list) { if (futex_match (&this->key, &key1)) { if (this->pi_state || this->rt_waiter) { ret = -EINVAL; goto out_unlock; } this->wake(&wake_q, this); if (++ret >= nr_wake) break; } } if (op_ret > 0) { op_ret = 0; plist_for_each_entry_safe(this, next, &hb2->chain, list) { if (futex_match (&this->key, &key2)) { if (this->pi_state || this->rt_waiter) { ret = -EINVAL; goto out_unlock; } this->wake(&wake_q, this); if (++op_ret >= nr_wake2) break; } } ret += op_ret; } out_unlock: double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); return ret; } static long futex_wait_restart(struct restart_block *restart); /** * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal * @hb: the futex hash bucket, must be locked by the caller * @q: the futex_q to queue up on * @timeout: the prepared hrtimer_sleeper, or null for no timeout */ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, struct hrtimer_sleeper *timeout) { /* * The task state is guaranteed to be set before another task can * wake it. set_current_state() is implemented using smp_store_mb() and * futex_queue() calls spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); futex_queue(q, hb, current); /* Arm the timer */ if (timeout) hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); /* * If we have been removed from the hash list, then another task * has tried to wake us, and we can skip the call to schedule(). */ if (likely(!plist_node_empty(&q->list))) { /* * If the timer has already expired, current will already be * flagged for rescheduling. Only call schedule if there * is no timeout, or if it has yet to expire. */ if (!timeout || timeout->task) schedule(); } __set_current_state(TASK_RUNNING); } /** * futex_unqueue_multiple - Remove various futexes from their hash bucket * @v: The list of futexes to unqueue * @count: Number of futexes in the list * * Helper to unqueue a list of futexes. This can't fail. * * Return: * - >=0 - Index of the last futex that was awoken; * - -1 - No futex was awoken */ int futex_unqueue_multiple(struct futex_vector *v, int count) { int ret = -1, i; for (i = 0; i < count; i++) { if (!futex_unqueue(&v[i].q)) ret = i; } return ret; } /** * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes * @vs: The futex list to wait on * @count: The size of the list * @woken: Index of the last woken futex, if any. Used to notify the * caller that it can return this index to userspace (return parameter) * * Prepare multiple futexes in a single step and enqueue them. This may fail if * the futex list is invalid or if any futex was already awoken. On success the * task is ready to interruptible sleep. * * Return: * - 1 - One of the futexes was woken by another thread * - 0 - Success * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL */ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) { struct futex_hash_bucket *hb; bool retry = false; int ret, i; u32 uval; /* * Enqueuing multiple futexes is tricky, because we need to enqueue * each futex on the list before dealing with the next one to avoid * deadlocking on the hash bucket. But, before enqueuing, we need to * make sure that current->state is TASK_INTERRUPTIBLE, so we don't * lose any wake events, which cannot be done before the get_futex_key * of the next key, because it calls get_user_pages, which can sleep. * Thus, we fetch the list of futexes keys in two steps, by first * pinning all the memory keys in the futex key, and only then we read * each key and queue the corresponding futex. * * Private futexes doesn't need to recalculate hash in retry, so skip * get_futex_key() when retrying. */ retry: for (i = 0; i < count; i++) { if (!(vs[i].w.flags & FLAGS_SHARED) && retry) continue; ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), vs[i].w.flags, &vs[i].q.key, FUTEX_READ); if (unlikely(ret)) return ret; } set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); for (i = 0; i < count; i++) { u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr; struct futex_q *q = &vs[i].q; u32 val = vs[i].w.val; hb = futex_q_lock(q); ret = futex_get_value_locked(&uval, uaddr); if (!ret && uval == val) { /* * The bucket lock can't be held while dealing with the * next futex. Queue each futex at this moment so hb can * be unlocked. */ futex_queue(q, hb, current); continue; } futex_q_unlock(hb); __set_current_state(TASK_RUNNING); /* * Even if something went wrong, if we find out that a futex * was woken, we don't return error and return this index to * userspace */ *woken = futex_unqueue_multiple(vs, i); if (*woken >= 0) return 1; if (ret) { /* * If we need to handle a page fault, we need to do so * without any lock and any enqueued futex (otherwise * we could lose some wakeup). So we do it here, after * undoing all the work done so far. In success, we * retry all the work. */ if (get_user(uval, uaddr)) return -EFAULT; retry = true; goto retry; } if (uval != val) return -EWOULDBLOCK; } return 0; } /** * futex_sleep_multiple - Check sleeping conditions and sleep * @vs: List of futexes to wait for * @count: Length of vs * @to: Timeout * * Sleep if and only if the timeout hasn't expired and no futex on the list has * been woken up. */ static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count, struct hrtimer_sleeper *to) { if (to && !to->task) return; for (; count; count--, vs++) { if (!READ_ONCE(vs->q.lock_ptr)) return; } schedule(); } /** * futex_wait_multiple - Prepare to wait on and enqueue several futexes * @vs: The list of futexes to wait on * @count: The number of objects * @to: Timeout before giving up and returning to userspace * * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function * sleeps on a group of futexes and returns on the first futex that is * wake, or after the timeout has elapsed. * * Return: * - >=0 - Hint to the futex that was awoken * - <0 - On error */ int futex_wait_multiple(struct futex_vector *vs, unsigned int count, struct hrtimer_sleeper *to) { int ret, hint = 0; if (to) hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); while (1) { ret = futex_wait_multiple_setup(vs, count, &hint); if (ret) { if (ret > 0) { /* A futex was woken during setup */ ret = hint; } return ret; } futex_sleep_multiple(vs, count, to); __set_current_state(TASK_RUNNING); ret = futex_unqueue_multiple(vs, count); if (ret >= 0) return ret; if (to && !to->task) return -ETIMEDOUT; else if (signal_pending(current)) return -ERESTARTSYS; /* * The final case is a spurious wakeup, for * which just retry. */ } } /** * futex_wait_setup() - Prepare to wait on a futex * @uaddr: the futex userspace address * @val: the expected value * @flags: futex flags (FLAGS_SHARED, etc.) * @q: the associated futex_q * @hb: storage for hash_bucket pointer to be returned to caller * * Setup the futex_q and locate the hash_bucket. Get the futex value and * compare it with the expected value. Handle atomic faults internally. * Return with the hb lock held on success, and unlocked on failure. * * Return: * - 0 - uaddr contains val and hb has been locked; * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked */ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, struct futex_q *q, struct futex_hash_bucket **hb) { u32 uval; int ret; /* * Access the page AFTER the hash-bucket is locked. * Order is important: * * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); } * * The basic logical guarantee of a futex is that it blocks ONLY * if cond(var) is known to be true at the time of blocking, for * any cond. If we locked the hash-bucket after testing *uaddr, that * would open a race condition where we could block indefinitely with * cond(var) false, which would violate the guarantee. * * On the other hand, we insert q and release the hash-bucket only * after testing *uaddr. This guarantees that futex_wait() will NOT * absorb a wakeup if *uaddr does not match the desired values * while the syscall executes. */ retry: ret = get_futex_key(uaddr, flags, &q->key, FUTEX_READ); if (unlikely(ret != 0)) return ret; retry_private: *hb = futex_q_lock(q); ret = futex_get_value_locked(&uval, uaddr); if (ret) { futex_q_unlock(*hb); ret = get_user(uval, uaddr); if (ret) return ret; if (!(flags & FLAGS_SHARED)) goto retry_private; goto retry; } if (uval != val) { futex_q_unlock(*hb); ret = -EWOULDBLOCK; } return ret; } int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, struct hrtimer_sleeper *to, u32 bitset) { struct futex_q q = futex_q_init; struct futex_hash_bucket *hb; int ret; if (!bitset) return -EINVAL; q.bitset = bitset; retry: /* * Prepare to wait on uaddr. On success, it holds hb->lock and q * is initialized. */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) return ret; /* futex_queue and wait for wakeup, timeout, or a signal. */ futex_wait_queue(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ if (!futex_unqueue(&q)) return 0; if (to && !to->task) return -ETIMEDOUT; /* * We expect signal_pending(current), but we might be the * victim of a spurious wakeup as well. */ if (!signal_pending(current)) goto retry; return -ERESTARTSYS; } int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) { struct hrtimer_sleeper timeout, *to; struct restart_block *restart; int ret; to = futex_setup_timer(abs_time, &timeout, flags, current->timer_slack_ns); ret = __futex_wait(uaddr, flags, val, to, bitset); /* No timeout, nothing to clean up. */ if (!to) return ret; hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); if (ret == -ERESTARTSYS) { restart = &current->restart_block; restart->futex.uaddr = uaddr; restart->futex.val = val; restart->futex.time = *abs_time; restart->futex.bitset = bitset; restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; return set_restart_fn(restart, futex_wait_restart); } return ret; } static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = restart->futex.uaddr; ktime_t t, *tp = NULL; if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { t = restart->futex.time; tp = &t; } restart->fn = do_no_restart_syscall; return (long)futex_wait(uaddr, restart->futex.flags, restart->futex.val, tp, restart->futex.bitset); }
9 7 3 4 9 1 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hpfs/map.c * * Mikulas Patocka (mikulas@artax.karlin.mff.cuni.cz), 1998-1999 * * mapping structures to memory with some minimal checks */ #include "hpfs_fn.h" __le32 *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) { return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); } __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; __le32 *ret; unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) { hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); return NULL; } ret = hpfs_map_4sectors(s, sec, qbh, 4); if (ret) hpfs_prefetch_bitmap(s, bmp_block + 1); return ret; } void hpfs_prefetch_bitmap(struct super_block *s, unsigned bmp_block) { unsigned to_prefetch, next_prefetch; unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; if (unlikely(bmp_block >= n_bands)) return; to_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); if (unlikely(bmp_block + 1 >= n_bands)) next_prefetch = 0; else next_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block + 1]); hpfs_prefetch_sectors(s, to_prefetch, 4 + 4 * (to_prefetch + 4 == next_prefetch)); } /* * Load first code page into kernel memory, return pointer to 256-byte array, * first 128 bytes are uppercasing table for chars 128-255, next 128 bytes are * lowercasing table */ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) { struct buffer_head *bh; secno cpds; unsigned cpi; unsigned char *ptr; unsigned char *cp_table; int i; struct code_page_data *cpd; struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0); if (!cp) return NULL; if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) { pr_err("Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic)); brelse(bh); return NULL; } if (!le32_to_cpu(cp->n_code_pages)) { pr_err("n_code_pages == 0\n"); brelse(bh); return NULL; } cpds = le32_to_cpu(cp->array[0].code_page_data); cpi = le16_to_cpu(cp->array[0].index); brelse(bh); if (cpi >= 3) { pr_err("Code page index out of array\n"); return NULL; } if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL; if (le16_to_cpu(cpd->offs[cpi]) > 0x178) { pr_err("Code page index out of sector\n"); brelse(bh); return NULL; } ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6; if (!(cp_table = kmalloc(256, GFP_KERNEL))) { pr_err("out of memory for code page table\n"); brelse(bh); return NULL; } memcpy(cp_table, ptr, 128); brelse(bh); /* Try to build lowercasing table from uppercasing one */ for (i=128; i<256; i++) cp_table[i]=i; for (i=128; i<256; i++) if (cp_table[i-128]!=i && cp_table[i-128]>=128) cp_table[cp_table[i-128]] = i; return cp_table; } __le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) { struct buffer_head *bh; int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; int i; __le32 *b; if (!(b = kmalloc_array(n, 512, GFP_KERNEL))) { pr_err("can't allocate memory for bitmap directory\n"); return NULL; } for (i=0;i<n;i++) { __le32 *d = hpfs_map_sector(s, bmp+i, &bh, n - i - 1); if (!d) { kfree(b); return NULL; } memcpy((char *)b + 512 * i, d, 512); brelse(bh); } return b; } void hpfs_load_hotfix_map(struct super_block *s, struct hpfs_spare_block *spareblock) { struct quad_buffer_head qbh; __le32 *directory; u32 n_hotfixes, n_used_hotfixes; unsigned i; n_hotfixes = le32_to_cpu(spareblock->n_spares); n_used_hotfixes = le32_to_cpu(spareblock->n_spares_used); if (n_hotfixes > 256 || n_used_hotfixes > n_hotfixes) { hpfs_error(s, "invalid number of hotfixes: %u, used: %u", n_hotfixes, n_used_hotfixes); return; } if (!(directory = hpfs_map_4sectors(s, le32_to_cpu(spareblock->hotfix_map), &qbh, 0))) { hpfs_error(s, "can't load hotfix map"); return; } for (i = 0; i < n_used_hotfixes; i++) { hpfs_sb(s)->hotfix_from[i] = le32_to_cpu(directory[i]); hpfs_sb(s)->hotfix_to[i] = le32_to_cpu(directory[n_hotfixes + i]); } hpfs_sb(s)->n_hotfixes = n_used_hotfixes; hpfs_brelse4(&qbh); } /* * Load fnode to memory */ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_head **bhp) { struct fnode *fnode; if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ino, 1, "fnode")) { return NULL; } if ((fnode = hpfs_map_sector(s, ino, bhp, FNODE_RD_AHEAD))) { if (hpfs_sb(s)->sb_chk) { struct extended_attribute *ea; struct extended_attribute *ea_end; if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) { hpfs_error(s, "bad magic on fnode %08lx", (unsigned long)ino); goto bail; } if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != (bp_internal(&fnode->btree) ? 12 : 8)) { hpfs_error(s, "bad number of nodes in fnode %08lx", (unsigned long)ino); goto bail; } if (le16_to_cpu(fnode->btree.first_free) != 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", (unsigned long)ino); goto bail; } } if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) { hpfs_error(s, "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x", (unsigned long)ino, le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); goto bail; } ea = fnode_ea(fnode); ea_end = fnode_end_ea(fnode); while (ea != ea_end) { if (ea > ea_end) { hpfs_error(s, "bad EA in fnode %08lx", (unsigned long)ino); goto bail; } ea = next_ea(ea); } } } return fnode; bail: brelse(*bhp); return NULL; } struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buffer_head **bhp) { struct anode *anode; if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL; if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD))) if (hpfs_sb(s)->sb_chk) { if (le32_to_cpu(anode->magic) != ANODE_MAGIC) { hpfs_error(s, "bad magic on anode %08x", ano); goto bail; } if (le32_to_cpu(anode->self) != ano) { hpfs_error(s, "self pointer invalid on anode %08x", ano); goto bail; } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != (bp_internal(&anode->btree) ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } if (le16_to_cpu(anode->btree.first_free) != 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; } } return anode; bail: brelse(*bhp); return NULL; } /* * Load dnode to memory and do some checks */ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, struct quad_buffer_head *qbh) { struct dnode *dnode; if (hpfs_sb(s)->sb_chk) { if (hpfs_chk_sectors(s, secno, 4, "dnode")) return NULL; if (secno & 3) { hpfs_error(s, "dnode %08x not byte-aligned", secno); return NULL; } } if ((dnode = hpfs_map_4sectors(s, secno, qbh, DNODE_RD_AHEAD))) if (hpfs_sb(s)->sb_chk) { unsigned p, pp = 0; unsigned char *d = (unsigned char *)dnode; int b = 0; if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) { hpfs_error(s, "bad magic on dnode %08x", secno); goto bail; } if (le32_to_cpu(dnode->self) != secno) hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self)); /* Check dirents - bad dirents would cause infinite loops or shooting to memory */ if (le32_to_cpu(dnode->first_free) > 2048) { hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free)); goto bail; } for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) { struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p); if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) { hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) { if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & SB_RDONLY) goto ok; hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } ok: if (hpfs_sb(s)->sb_chk >= 2) b |= 1 << de->down; if (de->down) if (de_down_pointer(de) < 0x10) { hpfs_error(s, "bad down pointer in dnode %08x, dirent %03x, last %03x", secno, p, pp); goto bail; } pp = p; } if (p != le32_to_cpu(dnode->first_free)) { hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno); goto bail; } if (d[pp + 30] != 1 || d[pp + 31] != 255) { hpfs_error(s, "dnode %08x does not end with \\377 entry", secno); goto bail; } if (b == 3) pr_err("unbalanced dnode tree, dnode %08x; see hpfs.txt 4 more info\n", secno); } return dnode; bail: hpfs_brelse4(qbh); return NULL; } dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino) { struct buffer_head *bh; struct fnode *fnode; dnode_secno dno; fnode = hpfs_map_fnode(s, ino, &bh); if (!fnode) return 0; dno = le32_to_cpu(fnode->u.external[0].disk_secno); brelse(bh); return dno; }
608 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_LINUX__ #define __XFS_LINUX__ #include <linux/types.h> #include <linux/uuid.h> /* * Kernel specific type declarations for XFS */ typedef __s64 xfs_off_t; /* <file offset> type */ typedef unsigned long long xfs_ino_t; /* <inode> type */ typedef __s64 xfs_daddr_t; /* <disk address> type */ typedef __u32 xfs_dev_t; typedef __u32 xfs_nlink_t; #include "xfs_types.h" #include <linux/semaphore.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/crc32c.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/file.h> #include <linux/filelock.h> #include <linux/swap.h> #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/bitops.h> #include <linux/major.h> #include <linux/pagemap.h> #include <linux/vfs.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/list.h> #include <linux/proc_fs.h> #include <linux/sort.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/delay.h> #include <linux/log2.h> #include <linux/rwsem.h> #include <linux/spinlock.h> #include <linux/random.h> #include <linux/ctype.h> #include <linux/writeback.h> #include <linux/capability.h> #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/list_sort.h> #include <linux/ratelimit.h> #include <linux/rhashtable.h> #include <linux/xattr.h> #include <linux/mnt_idmapping.h> #include <linux/debugfs.h> #include <asm/page.h> #include <asm/div64.h> #include <asm/param.h> #include <linux/uaccess.h> #include <asm/byteorder.h> #include <linux/unaligned.h> #include "xfs_fs.h" #include "xfs_stats.h" #include "xfs_sysctl.h" #include "xfs_iops.h" #include "xfs_aops.h" #include "xfs_super.h" #include "xfs_cksum.h" #include "xfs_buf.h" #include "xfs_message.h" #include "xfs_drain.h" #include "xfs_hooks.h" #ifdef __BIG_ENDIAN #define XFS_NATIVE_HOST 1 #else #undef XFS_NATIVE_HOST #endif #define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val #define xfs_error_level xfs_params.error_level.val #define xfs_syncd_centisecs xfs_params.syncd_timer.val #define xfs_stats_clear xfs_params.stats_clear.val #define xfs_inherit_sync xfs_params.inherit_sync.val #define xfs_inherit_nodump xfs_params.inherit_nodump.val #define xfs_inherit_noatime xfs_params.inherit_noatim.val #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val #define xfs_rotorstep xfs_params.rotorstep.val #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val #define xfs_blockgc_secs xfs_params.blockgc_timer.val #define current_cpu() (raw_smp_processor_id()) #define current_set_flags_nested(sp, f) \ (*(sp) = current->flags, current->flags |= (f)) #define current_restore_flags_nested(sp, f) \ (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) #define NBBY 8 /* number of bits per byte */ /* * Size of block device i/o is parameterized here. * Currently the system supports page-sized i/o. */ #define BLKDEV_IOSHIFT PAGE_SHIFT #define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) /* number of BB's per block device block */ #define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) #define ENOATTR ENODATA /* Attribute not found */ #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define __return_address __builtin_return_address(0) /* * Return the address of a label. Use barrier() so that the optimizer * won't reorder code to refactor the error jumpouts into a single * return, which throws off the reported address. */ #define __this_address ({ __label__ __here; __here: barrier(); &&__here; }) #define howmany(x, y) (((x)+((y)-1))/(y)) static inline void delay(long ticks) { schedule_timeout_uninterruptible(ticks); } /* * XFS wrapper structure for sysfs support. It depends on external data * structures and is embedded in various internal data structures to implement * the XFS sysfs object heirarchy. Define it here for broad access throughout * the codebase. */ struct xfs_kobj { struct kobject kobject; struct completion complete; }; struct xstats { struct xfsstats __percpu *xs_stats; struct xfs_kobj xs_kobj; }; extern struct xstats xfsstats; static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev) { return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); } static inline xfs_dev_t linux_to_xfs_dev_t(dev_t dev) { return sysv_encode_dev(dev); } /* * Various platform dependent calls that don't fit anywhere else */ #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() static inline uint64_t rounddown_64(uint64_t x, uint32_t y) { do_div(x, y); return x * y; } static inline uint64_t roundup_64(uint64_t x, uint32_t y) { x += y - 1; do_div(x, y); return x * y; } static inline uint64_t howmany_64(uint64_t x, uint32_t y) { x += y - 1; do_div(x, y); return x; } static inline bool isaligned_64(uint64_t x, uint32_t y) { return do_div(x, y) == 0; } /* If @b is a power of 2, return log2(b). Else return -1. */ static inline int8_t log2_if_power2(unsigned long b) { return is_power_of_2(b) ? ilog2(b) : -1; } /* If @b is a power of 2, return a mask of the lower bits, else return zero. */ static inline unsigned long long mask64_if_power2(unsigned long b) { return is_power_of_2(b) ? b - 1 : 0; } int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, char *data, enum req_op op); #define ASSERT_ALWAYS(expr) \ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) #ifdef DEBUG #define ASSERT(expr) \ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) #else /* !DEBUG */ #ifdef XFS_WARN #define ASSERT(expr) \ (likely(expr) ? (void)0 : asswarn(NULL, #expr, __FILE__, __LINE__)) #else /* !DEBUG && !XFS_WARN */ #define ASSERT(expr) ((void)0) #endif /* XFS_WARN */ #endif /* DEBUG */ #define XFS_IS_CORRUPT(mp, expr) \ (unlikely(expr) ? xfs_corruption_error(#expr, XFS_ERRLEVEL_LOW, (mp), \ NULL, 0, __FILE__, __LINE__, \ __this_address), \ true : false) #define STATIC static noinline #ifdef CONFIG_XFS_RT /* * make sure we ignore the inode flag if the filesystem doesn't have a * configured realtime device. */ #define XFS_IS_REALTIME_INODE(ip) \ (((ip)->i_diflags & XFS_DIFLAG_REALTIME) && \ (ip)->i_mount->m_rtdev_targp) #define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0) #else #define XFS_IS_REALTIME_INODE(ip) (0) #define XFS_IS_REALTIME_MOUNT(mp) (0) #endif /* * Starting in Linux 4.15, the %p (raw pointer value) printk modifier * prints a hashed version of the pointer to avoid leaking kernel * pointers into dmesg. If we're trying to debug the kernel we want the * raw values, so override this behavior as best we can. */ #ifdef DEBUG # define PTR_FMT "%px" #else # define PTR_FMT "%p" #endif /* * Helper for IO routines to grab backing pages from allocated kernel memory. */ static inline struct page * kmem_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); return virt_to_page(addr); } #endif /* __XFS_LINUX__ */
6050 466 5909 5291 2570 5693 2579 151 568 5676 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 // SPDX-License-Identifier: GPL-2.0-only /* * ratelimit.c - Do something with rate limit. * * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com> * * 2008-05-01 rewrite the function and use a ratelimit_state data struct as * parameter. Now every user can use their own standalone ratelimit_state. */ #include <linux/ratelimit.h> #include <linux/jiffies.h> #include <linux/export.h> /* * __ratelimit - rate limiting * @rs: ratelimit_state data * @func: name of calling function * * This enforces a rate limit: not more than @rs->burst callbacks * in every @rs->interval * * RETURNS: * 0 means callbacks will be suppressed. * 1 means go ahead and do it. */ int ___ratelimit(struct ratelimit_state *rs, const char *func) { /* Paired with WRITE_ONCE() in .proc_handler(). * Changing two values seperately could be inconsistent * and some message could be lost. (See: net_ratelimit_state). */ int interval = READ_ONCE(rs->interval); int burst = READ_ONCE(rs->burst); unsigned long flags; int ret; if (!interval) return 1; /* * If we contend on this state's lock then almost * by definition we are too busy to print a message, * in addition to the one that will be printed by * the entity that is holding the lock already: */ if (!raw_spin_trylock_irqsave(&rs->lock, flags)) return 0; if (!rs->begin) rs->begin = jiffies; if (time_is_before_jiffies(rs->begin + interval)) { if (rs->missed) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { printk_deferred(KERN_WARNING "%s: %d callbacks suppressed\n", func, rs->missed); rs->missed = 0; } } rs->begin = jiffies; rs->printed = 0; } if (burst && burst > rs->printed) { rs->printed++; ret = 1; } else { rs->missed++; ret = 0; } raw_spin_unlock_irqrestore(&rs->lock, flags); return ret; } EXPORT_SYMBOL(___ratelimit);
4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/slab.h> #include <linux/regset.h> static int __regset_get(struct task_struct *target, const struct user_regset *regset, unsigned int size, void **data) { void *p = *data, *to_free = NULL; int res; if (!regset->regset_get) return -EOPNOTSUPP; if (size > regset->n * regset->size) size = regset->n * regset->size; if (!p) { to_free = p = kvzalloc(size, GFP_KERNEL); if (!p) return -ENOMEM; } res = regset->regset_get(target, regset, (struct membuf){.p = p, .left = size}); if (res < 0) { kvfree(to_free); return res; } *data = p; return size - res; } int regset_get(struct task_struct *target, const struct user_regset *regset, unsigned int size, void *data) { return __regset_get(target, regset, size, &data); } EXPORT_SYMBOL(regset_get); int regset_get_alloc(struct task_struct *target, const struct user_regset *regset, unsigned int size, void **data) { *data = NULL; return __regset_get(target, regset, size, data); } EXPORT_SYMBOL(regset_get_alloc); /** * copy_regset_to_user - fetch a thread's user_regset data into user memory * @target: thread to be examined * @view: &struct user_regset_view describing user thread machine state * @setno: index in @view->regsets * @offset: offset into the regset data, in bytes * @size: amount of data to copy, in bytes * @data: user-mode pointer to copy into */ int copy_regset_to_user(struct task_struct *target, const struct user_regset_view *view, unsigned int setno, unsigned int offset, unsigned int size, void __user *data) { const struct user_regset *regset = &view->regsets[setno]; void *buf; int ret; ret = regset_get_alloc(target, regset, size, &buf); if (ret > 0) ret = copy_to_user(data, buf, ret) ? -EFAULT : 0; kvfree(buf); return ret; }
5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/backing-dev.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/pagevec.h> #include <linux/task_io_accounting_ops.h> #include <linux/signal.h> #include <linux/iversion.h> #include <linux/ktime.h> #include <linux/netfs.h> #include <trace/events/netfs.h> #include "super.h" #include "mds_client.h" #include "cache.h" #include "metric.h" #include "crypto.h" #include <linux/ceph/osd_client.h> #include <linux/ceph/striper.h> /* * Ceph address space ops. * * There are a few funny things going on here. * * The page->private field is used to reference a struct * ceph_snap_context for _every_ dirty page. This indicates which * snapshot the page was logically dirtied in, and thus which snap * context needs to be associated with the osd write during writeback. * * Similarly, struct ceph_inode_info maintains a set of counters to * count dirty pages on the inode. In the absence of snapshots, * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count. * * When a snapshot is taken (that is, when the client receives * notification that a snapshot was taken), each inode with caps and * with dirty pages (dirty pages implies there is a cap) gets a new * ceph_cap_snap in the i_cap_snaps list (which is sorted in ascending * order, new snaps go to the tail). The i_wrbuffer_ref_head count is * moved to capsnap->dirty. (Unless a sync write is currently in * progress. In that case, the capsnap is said to be "pending", new * writes cannot start, and the capsnap isn't "finalized" until the * write completes (or fails) and a final size/mtime for the inode for * that snap can be settled upon.) i_wrbuffer_ref_head is reset to 0. * * On writeback, we must submit writes to the osd IN SNAP ORDER. So, * we look for the first capsnap in i_cap_snaps and write out pages in * that snap context _only_. Then we move on to the next capsnap, * eventually reaching the "live" or "head" context (i.e., pages that * are not yet snapped) and are writing the most recently dirtied * pages. * * Invalidate and so forth must take care to ensure the dirty page * accounting is preserved. */ #define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10)) #define CONGESTION_OFF_THRESH(congestion_kb) \ (CONGESTION_ON_THRESH(congestion_kb) - \ (CONGESTION_ON_THRESH(congestion_kb) >> 2)) static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, struct folio **foliop, void **_fsdata); static inline struct ceph_snap_context *page_snap_context(struct page *page) { if (PagePrivate(page)) return (void *)page->private; return NULL; } /* * Dirty a page. Optimistically adjust accounting, on the assumption * that we won't race with invalidate. If we do, readjust. */ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) { struct inode *inode = mapping->host; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci; struct ceph_snap_context *snapc; if (folio_test_dirty(folio)) { doutc(cl, "%llx.%llx %p idx %lu -- already dirty\n", ceph_vinop(inode), folio, folio->index); VM_BUG_ON_FOLIO(!folio_test_private(folio), folio); return false; } ci = ceph_inode(inode); /* dirty the head */ spin_lock(&ci->i_ceph_lock); if (__ceph_have_pending_cap_snap(ci)) { struct ceph_cap_snap *capsnap = list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap, ci_item); snapc = ceph_get_snap_context(capsnap->context); capsnap->dirty_pages++; } else { BUG_ON(!ci->i_head_snapc); snapc = ceph_get_snap_context(ci->i_head_snapc); ++ci->i_wrbuffer_ref_head; } if (ci->i_wrbuffer_ref == 0) ihold(inode); ++ci->i_wrbuffer_ref; doutc(cl, "%llx.%llx %p idx %lu head %d/%d -> %d/%d " "snapc %p seq %lld (%d snaps)\n", ceph_vinop(inode), folio, folio->index, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, snapc, snapc->seq, snapc->num_snaps); spin_unlock(&ci->i_ceph_lock); /* * Reference snap context in folio->private. Also set * PagePrivate so that we get invalidate_folio callback. */ VM_WARN_ON_FOLIO(folio->private, folio); folio_attach_private(folio, snapc); return ceph_fscache_dirty_folio(mapping, folio); } /* * If we are truncating the full folio (i.e. offset == 0), adjust the * dirty folio counters appropriately. Only called if there is private * data on the folio. */ static void ceph_invalidate_folio(struct folio *folio, size_t offset, size_t length) { struct inode *inode = folio->mapping->host; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; if (offset != 0 || length != folio_size(folio)) { doutc(cl, "%llx.%llx idx %lu partial dirty page %zu~%zu\n", ceph_vinop(inode), folio->index, offset, length); return; } WARN_ON(!folio_test_locked(folio)); if (folio_test_private(folio)) { doutc(cl, "%llx.%llx idx %lu full dirty page\n", ceph_vinop(inode), folio->index); snapc = folio_detach_private(folio); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); ceph_put_snap_context(snapc); } netfs_invalidate_folio(folio, offset, length); } static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) { struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_layout *lo = &ci->i_layout; unsigned long max_pages = inode->i_sb->s_bdi->ra_pages; loff_t end = rreq->start + rreq->len, new_end; struct ceph_netfs_request_data *priv = rreq->netfs_priv; unsigned long max_len; u32 blockoff; if (priv) { /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */ if (priv->file_ra_disabled) max_pages = 0; else max_pages = priv->file_ra_pages; } /* Readahead is disabled */ if (!max_pages) return; max_len = max_pages << PAGE_SHIFT; /* * Try to expand the length forward by rounding up it to the next * block, but do not exceed the file size, unless the original * request already exceeds it. */ new_end = umin(round_up(end, lo->stripe_unit), rreq->i_size); if (new_end > end && new_end <= rreq->start + max_len) rreq->len = new_end - rreq->start; /* Try to expand the start downward */ div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); if (rreq->len + blockoff <= max_len) { rreq->start -= blockoff; rreq->len += blockoff; } } static void finish_netfs_read(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); struct netfs_io_subrequest *subreq = req->r_priv; struct ceph_osd_req_op *op = &req->r_ops[0]; int err = req->r_result; bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ); ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, osd_data->length, err); doutc(cl, "result %d subreq->len=%zu i_size=%lld\n", req->r_result, subreq->len, i_size_read(req->r_inode)); /* no object means success but no data */ if (err == -ENOENT) { __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); err = 0; } else if (err == -EBLOCKLISTED) { fsc->blocklisted = true; } if (err >= 0) { if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); if (err < subreq->len && subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { err = ceph_fscrypt_decrypt_extents(inode, osd_data->pages, subreq->start, op->extent.sparse_ext, op->extent.sparse_ext_cnt); if (err > subreq->len) err = subreq->len; } if (err > 0) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); } if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { ceph_put_page_vector(osd_data->pages, calc_pages_for(osd_data->alignment, osd_data->length), false); } if (err > 0) { subreq->transferred = err; err = 0; } subreq->error = err; trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress); netfs_read_subreq_terminated(subreq); iput(req->r_inode); ceph_dec_osd_stopping_blocker(fsc->mdsc); } static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; struct inode *inode = rreq->inode; struct ceph_mds_reply_info_parsed *rinfo; struct ceph_mds_reply_info_in *iinfo; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_info *ci = ceph_inode(inode); ssize_t err = 0; size_t len; int mode; if (rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); if (subreq->start >= inode->i_size) goto out; /* We need to fetch the inline data. */ mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; } req->r_ino1 = ci->i_vino; req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); req->r_num_caps = 2; trace_netfs_sreq(subreq, netfs_sreq_trace_submit); err = ceph_mdsc_do_request(mdsc, NULL, req); if (err < 0) goto out; rinfo = &req->r_reply_info; iinfo = &rinfo->targeti; if (iinfo->inline_version == CEPH_INLINE_NONE) { /* The data got uninlined */ ceph_mdsc_put_request(req); return false; } len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len); err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter); if (err == 0) { err = -EFAULT; } else { subreq->transferred += err; err = 0; } ceph_mdsc_put_request(req); out: subreq->error = err; trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress); netfs_read_subreq_terminated(subreq); return true; } static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); u64 objno, objoff; u32 xlen; /* Truncate the extent at the end of the current block */ ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len, &objno, &objoff, &xlen); rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize); return 0; } static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct ceph_osd_request *req = NULL; struct ceph_vino vino = ceph_vino(inode); int err; u64 len; bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 off = subreq->start; int extent_cnt; if (ceph_inode_is_shutdown(inode)) { err = -EIO; goto out; } if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) return; // TODO: This rounding here is slightly dodgy. It *should* work, for // now, as the cache only deals in blocks that are a multiple of // PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to // happen is for the fscrypt driving to be moved into netfslib and the // data in the cache also to be stored encrypted. len = subreq->len; ceph_fscrypt_adjust_off_and_len(inode, &off, &len); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { err = PTR_ERR(req); req = NULL; goto out; } if (sparse) { extent_cnt = __ceph_sparse_read_ext_count(inode, len); err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt); if (err) goto out; } doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n", ceph_vinop(inode), subreq->start, subreq->len, len); /* * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for * encrypted inodes. We'd need infrastructure that handles an iov_iter * instead of page arrays, and we don't have that as of yet. Once the * dust settles on the write helpers and encrypt/decrypt routines for * netfs, we should be able to rework this. */ if (IS_ENCRYPTED(inode)) { struct page **pages; size_t page_off; err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off); if (err < 0) { doutc(cl, "%llx.%llx failed to allocate pages, %d\n", ceph_vinop(inode), err); goto out; } /* should always give us a page-aligned read */ WARN_ON_ONCE(page_off); len = err; err = 0; osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false); } else { osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter); } if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { err = -EIO; goto out; } req->r_callback = finish_netfs_read; req->r_priv = subreq; req->r_inode = inode; ihold(inode); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); ceph_osdc_start_request(req->r_osdc, req); out: ceph_osdc_put_request(req); if (err) { subreq->error = err; netfs_read_subreq_terminated(subreq); } doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err); } static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = ceph_inode_to_client(inode); int got = 0, want = CEPH_CAP_FILE_CACHE; struct ceph_netfs_request_data *priv; int ret = 0; /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); if (rreq->origin != NETFS_READAHEAD) return 0; priv = kzalloc(sizeof(*priv), GFP_NOFS); if (!priv) return -ENOMEM; if (file) { struct ceph_rw_context *rw_ctx; struct ceph_file_info *fi = file->private_data; priv->file_ra_pages = file->f_ra.ra_pages; priv->file_ra_disabled = file->f_mode & FMODE_RANDOM; rw_ctx = ceph_find_rw_context(fi); if (rw_ctx) { rreq->netfs_priv = priv; return 0; } } /* * readahead callers do not necessarily hold Fcb caps * (e.g. fadvise, madvise). */ ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); if (ret < 0) { doutc(cl, "%llx.%llx, error getting cap\n", ceph_vinop(inode)); goto out; } if (!(got & want)) { doutc(cl, "%llx.%llx, no cache cap\n", ceph_vinop(inode)); ret = -EACCES; goto out; } if (ret == 0) { ret = -EACCES; goto out; } priv->caps = got; rreq->netfs_priv = priv; rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize; out: if (ret < 0) { if (got) ceph_put_cap_refs(ceph_inode(inode), got); kfree(priv); } return ret; } static void ceph_netfs_free_request(struct netfs_io_request *rreq) { struct ceph_netfs_request_data *priv = rreq->netfs_priv; if (!priv) return; if (priv->caps) ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps); kfree(priv); rreq->netfs_priv = NULL; } const struct netfs_request_ops ceph_netfs_ops = { .init_request = ceph_init_request, .free_request = ceph_netfs_free_request, .prepare_read = ceph_netfs_prepare_read, .issue_read = ceph_netfs_issue_read, .expand_readahead = ceph_netfs_expand_readahead, .check_write_begin = ceph_netfs_check_write_begin, }; #ifdef CONFIG_CEPH_FSCACHE static void ceph_set_page_fscache(struct page *page) { folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ } static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) { struct inode *inode = priv; if (IS_ERR_VALUE(error) && error != -ENOBUFS) ceph_fscache_invalidate(inode, false); } static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) { struct ceph_inode_info *ci = ceph_inode(inode); struct fscache_cookie *cookie = ceph_fscache_cookie(ci); fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), ceph_fscache_write_terminated, inode, true, caching); } #else static inline void ceph_set_page_fscache(struct page *page) { } static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) { } #endif /* CONFIG_CEPH_FSCACHE */ struct ceph_writeback_ctl { loff_t i_size; u64 truncate_size; u32 truncate_seq; bool size_stable; bool head_snapc; }; /* * Get ref for the oldest snapc for an inode with dirty data... that is, the * only snap context we are allowed to write back. */ static struct ceph_snap_context * get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl, struct ceph_snap_context *page_snapc) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_snap_context *snapc = NULL; struct ceph_cap_snap *capsnap = NULL; spin_lock(&ci->i_ceph_lock); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { doutc(cl, " capsnap %p snapc %p has %d dirty pages\n", capsnap, capsnap->context, capsnap->dirty_pages); if (!capsnap->dirty_pages) continue; /* get i_size, truncate_{seq,size} for page_snapc? */ if (snapc && capsnap->context != page_snapc) continue; if (ctl) { if (capsnap->writing) { ctl->i_size = i_size_read(inode); ctl->size_stable = false; } else { ctl->i_size = capsnap->size; ctl->size_stable = true; } ctl->truncate_size = capsnap->truncate_size; ctl->truncate_seq = capsnap->truncate_seq; ctl->head_snapc = false; } if (snapc) break; snapc = ceph_get_snap_context(capsnap->context); if (!page_snapc || page_snapc == snapc || page_snapc->seq > snapc->seq) break; } if (!snapc && ci->i_wrbuffer_ref_head) { snapc = ceph_get_snap_context(ci->i_head_snapc); doutc(cl, " head snapc %p has %d dirty pages\n", snapc, ci->i_wrbuffer_ref_head); if (ctl) { ctl->i_size = i_size_read(inode); ctl->truncate_size = ci->i_truncate_size; ctl->truncate_seq = ci->i_truncate_seq; ctl->size_stable = false; ctl->head_snapc = true; } } spin_unlock(&ci->i_ceph_lock); return snapc; } static u64 get_writepages_data_length(struct inode *inode, struct page *page, u64 start) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; struct ceph_cap_snap *capsnap = NULL; u64 end = i_size_read(inode); u64 ret; snapc = page_snap_context(ceph_fscrypt_pagecache_page(page)); if (snapc != ci->i_head_snapc) { bool found = false; spin_lock(&ci->i_ceph_lock); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { if (capsnap->context == snapc) { if (!capsnap->writing) end = capsnap->size; found = true; break; } } spin_unlock(&ci->i_ceph_lock); WARN_ON(!found); } if (end > ceph_fscrypt_page_offset(page) + thp_size(page)) end = ceph_fscrypt_page_offset(page) + thp_size(page); ret = end > start ? end - start : 0; if (ret && fscrypt_is_bounce_page(page)) ret = round_up(ret, CEPH_FSCRYPT_BLOCK_SIZE); return ret; } /* * Write a single page, but leave the page locked. * * If we get a write error, mark the mapping for error, but still adjust the * dirty page accounting (i.e., page is no longer dirty). */ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { struct folio *folio = page_folio(page); struct inode *inode = page->mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); int err; loff_t len = thp_size(page); loff_t wlen; struct ceph_writeback_ctl ceph_wbc; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_osd_request *req; bool caching = ceph_is_cache_enabled(inode); struct page *bounce_page = NULL; doutc(cl, "%llx.%llx page %p idx %lu\n", ceph_vinop(inode), page, page->index); if (ceph_inode_is_shutdown(inode)) return -EIO; /* verify this is a writeable snap context */ snapc = page_snap_context(page); if (!snapc) { doutc(cl, "%llx.%llx page %p not dirty?\n", ceph_vinop(inode), page); return 0; } oldest = get_oldest_context(inode, &ceph_wbc, snapc); if (snapc->seq > oldest->seq) { doutc(cl, "%llx.%llx page %p snapc %p not writeable - noop\n", ceph_vinop(inode), page, snapc); /* we should only noop if called by kswapd */ WARN_ON(!(current->flags & PF_MEMALLOC)); ceph_put_snap_context(oldest); redirty_page_for_writepage(wbc, page); return 0; } ceph_put_snap_context(oldest); /* is this a partial page at end of file? */ if (page_off >= ceph_wbc.i_size) { doutc(cl, "%llx.%llx folio at %lu beyond eof %llu\n", ceph_vinop(inode), folio->index, ceph_wbc.i_size); folio_invalidate(folio, 0, folio_size(folio)); return 0; } if (ceph_wbc.i_size < page_off + len) len = ceph_wbc.i_size - page_off; wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len; doutc(cl, "%llx.%llx page %p index %lu on %llu~%llu snapc %p seq %lld\n", ceph_vinop(inode), page, page->index, page_off, wlen, snapc, snapc->seq); if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) fsc->write_congested = true; req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &wlen, 0, 1, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, true); if (IS_ERR(req)) { redirty_page_for_writepage(wbc, page); return PTR_ERR(req); } if (wlen < len) len = wlen; set_page_writeback(page); if (caching) ceph_set_page_fscache(page); ceph_fscache_write_to_cache(inode, page_off, len, caching); if (IS_ENCRYPTED(inode)) { bounce_page = fscrypt_encrypt_pagecache_blocks(page, CEPH_FSCRYPT_BLOCK_SIZE, 0, GFP_NOFS); if (IS_ERR(bounce_page)) { redirty_page_for_writepage(wbc, page); end_page_writeback(page); ceph_osdc_put_request(req); return PTR_ERR(bounce_page); } } /* it may be a short write due to an object boundary */ WARN_ON_ONCE(len > thp_size(page)); osd_req_op_extent_osd_data_pages(req, 0, bounce_page ? &bounce_page : &page, wlen, 0, false, false); doutc(cl, "%llx.%llx %llu~%llu (%llu bytes, %sencrypted)\n", ceph_vinop(inode), page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not "); req->r_mtime = inode_get_mtime(inode); ceph_osdc_start_request(osdc, req); err = ceph_osdc_wait_request(osdc, req); ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, len, err); fscrypt_free_bounce_page(bounce_page); ceph_osdc_put_request(req); if (err == 0) err = len; if (err < 0) { struct writeback_control tmp_wbc; if (!wbc) wbc = &tmp_wbc; if (err == -ERESTARTSYS) { /* killed by SIGKILL */ doutc(cl, "%llx.%llx interrupted page %p\n", ceph_vinop(inode), page); redirty_page_for_writepage(wbc, page); end_page_writeback(page); return err; } if (err == -EBLOCKLISTED) fsc->blocklisted = true; doutc(cl, "%llx.%llx setting page/mapping error %d %p\n", ceph_vinop(inode), err, page); mapping_set_error(&inode->i_data, err); wbc->pages_skipped++; } else { doutc(cl, "%llx.%llx cleaned page %p\n", ceph_vinop(inode), page); err = 0; /* vfs expects us to return 0 */ } oldest = detach_page_private(page); WARN_ON_ONCE(oldest != snapc); end_page_writeback(page); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); ceph_put_snap_context(snapc); /* page's reference */ if (atomic_long_dec_return(&fsc->writeback_count) < CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) fsc->write_congested = false; return err; } static int ceph_writepage(struct page *page, struct writeback_control *wbc) { int err; struct inode *inode = page->mapping->host; BUG_ON(!inode); ihold(inode); if (wbc->sync_mode == WB_SYNC_NONE && ceph_inode_to_fs_client(inode)->write_congested) { redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ err = writepage_nounlock(page, wbc); if (err == -ERESTARTSYS) { /* direct memory reclaimer was killed by SIGKILL. return 0 * to prevent caller from setting mapping/page error */ err = 0; } unlock_page(page); iput(inode); return err; } /* * async writeback completion handler. * * If we get an error, set the mapping error bit, but not the individual * page error bits. */ static void writepages_finish(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_osd_data *osd_data; struct page *page; int num_pages, total_pages = 0; int i, j; int rc = req->r_result; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); unsigned int len = 0; bool remove_page; doutc(cl, "%llx.%llx rc %d\n", ceph_vinop(inode), rc); if (rc < 0) { mapping_set_error(mapping, rc); ceph_set_error_write(ci); if (rc == -EBLOCKLISTED) fsc->blocklisted = true; } else { ceph_clear_error_write(ci); } /* * We lost the cache cap, need to truncate the page before * it is unlocked, otherwise we'd truncate it later in the * page truncation thread, possibly losing some data that * raced its way in */ remove_page = !(ceph_caps_issued(ci) & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)); /* clean all pages */ for (i = 0; i < req->r_num_ops; i++) { if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) { pr_warn_client(cl, "%llx.%llx incorrect op %d req %p index %d tid %llu\n", ceph_vinop(inode), req->r_ops[i].op, req, i, req->r_tid); break; } osd_data = osd_req_op_extent_osd_data(req, i); BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); len += osd_data->length; num_pages = calc_pages_for((u64)osd_data->alignment, (u64)osd_data->length); total_pages += num_pages; for (j = 0; j < num_pages; j++) { page = osd_data->pages[j]; if (fscrypt_is_bounce_page(page)) { page = fscrypt_pagecache_page(page); fscrypt_free_bounce_page(osd_data->pages[j]); osd_data->pages[j] = page; } BUG_ON(!page); WARN_ON(!PageUptodate(page)); if (atomic_long_dec_return(&fsc->writeback_count) < CONGESTION_OFF_THRESH( fsc->mount_options->congestion_kb)) fsc->write_congested = false; ceph_put_snap_context(detach_page_private(page)); end_page_writeback(page); doutc(cl, "unlocking %p\n", page); if (remove_page) generic_error_remove_folio(inode->i_mapping, page_folio(page)); unlock_page(page); } doutc(cl, "%llx.%llx wrote %llu bytes cleaned %d pages\n", ceph_vinop(inode), osd_data->length, rc >= 0 ? num_pages : 0); release_pages(osd_data->pages, num_pages); } ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, len, rc); ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc); osd_data = osd_req_op_extent_osd_data(req, 0); if (osd_data->pages_from_pool) mempool_free(osd_data->pages, ceph_wb_pagevec_pool); else kfree(osd_data->pages); ceph_osdc_put_request(req); ceph_dec_osd_stopping_blocker(fsc->mdsc); } /* * initiate async writeback */ static int ceph_writepages_start(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct ceph_vino vino = ceph_vino(inode); pgoff_t index, start_index, end = -1; struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; struct folio_batch fbatch; int rc = 0; unsigned int wsize = i_blocksize(inode); struct ceph_osd_request *req = NULL; struct ceph_writeback_ctl ceph_wbc; bool should_loop, range_whole = false; bool done = false; bool caching = ceph_is_cache_enabled(inode); xa_mark_t tag; if (wbc->sync_mode == WB_SYNC_NONE && fsc->write_congested) return 0; doutc(cl, "%llx.%llx (mode=%s)\n", ceph_vinop(inode), wbc->sync_mode == WB_SYNC_NONE ? "NONE" : (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); if (ceph_inode_is_shutdown(inode)) { if (ci->i_wrbuffer_ref > 0) { pr_warn_ratelimited_client(cl, "%llx.%llx %lld forced umount\n", ceph_vinop(inode), ceph_ino(inode)); } mapping_set_error(mapping, -EIO); return -EIO; /* we're in a forced umount, don't write! */ } if (fsc->mount_options->wsize < wsize) wsize = fsc->mount_options->wsize; folio_batch_init(&fbatch); start_index = wbc->range_cyclic ? mapping->writeback_index : 0; index = start_index; if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) { tag = PAGECACHE_TAG_TOWRITE; } else { tag = PAGECACHE_TAG_DIRTY; } retry: /* find oldest snap context with dirty data */ snapc = get_oldest_context(inode, &ceph_wbc, NULL); if (!snapc) { /* hmm, why does writepages get called when there is no dirty data? */ doutc(cl, " no snap context with dirty data?\n"); goto out; } doutc(cl, " oldest snapc is %p seq %lld (%d snaps)\n", snapc, snapc->seq, snapc->num_snaps); should_loop = false; if (ceph_wbc.head_snapc && snapc != last_snapc) { /* where to start/end? */ if (wbc->range_cyclic) { index = start_index; end = -1; if (index > 0) should_loop = true; doutc(cl, " cyclic, start at %lu\n", index); } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = true; doutc(cl, " not cyclic, %lu to %lu\n", index, end); } } else if (!ceph_wbc.head_snapc) { /* Do not respect wbc->range_{start,end}. Dirty pages * in that range can be associated with newer snapc. * They are not writeable until we write all dirty pages * associated with 'snapc' get written */ if (index > 0) should_loop = true; doutc(cl, " non-head snapc, range whole\n"); } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); ceph_put_snap_context(last_snapc); last_snapc = snapc; while (!done && index <= end) { int num_ops = 0, op_idx; unsigned i, nr_folios, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; struct page *page; pgoff_t strip_unit_end = 0; u64 offset = 0, len = 0; bool from_pool = false; max_pages = wsize >> PAGE_SHIFT; get_more_pages: nr_folios = filemap_get_folios_tag(mapping, &index, end, tag, &fbatch); doutc(cl, "pagevec_lookup_range_tag got %d\n", nr_folios); if (!nr_folios && !locked_pages) break; for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { struct folio *folio = fbatch.folios[i]; page = &folio->page; doutc(cl, "? %p idx %lu\n", page, page->index); if (locked_pages == 0) lock_page(page); /* first page */ else if (!trylock_page(page)) break; /* only dirty pages, or our accounting breaks */ if (unlikely(!PageDirty(page)) || unlikely(page->mapping != mapping)) { doutc(cl, "!dirty or !mapping %p\n", page); unlock_page(page); continue; } /* only if matching snap context */ pgsnapc = page_snap_context(page); if (pgsnapc != snapc) { doutc(cl, "page snapc %p %lld != oldest %p %lld\n", pgsnapc, pgsnapc->seq, snapc, snapc->seq); if (!should_loop && !ceph_wbc.head_snapc && wbc->sync_mode != WB_SYNC_NONE) should_loop = true; unlock_page(page); continue; } if (page_offset(page) >= ceph_wbc.i_size) { doutc(cl, "folio at %lu beyond eof %llu\n", folio->index, ceph_wbc.i_size); if ((ceph_wbc.size_stable || folio_pos(folio) >= i_size_read(inode)) && folio_clear_dirty_for_io(folio)) folio_invalidate(folio, 0, folio_size(folio)); folio_unlock(folio); continue; } if (strip_unit_end && (page->index > strip_unit_end)) { doutc(cl, "end of strip unit %p\n", page); unlock_page(page); break; } if (folio_test_writeback(folio) || folio_test_private_2(folio) /* [DEPRECATED] */) { if (wbc->sync_mode == WB_SYNC_NONE) { doutc(cl, "%p under writeback\n", folio); folio_unlock(folio); continue; } doutc(cl, "waiting on writeback %p\n", folio); folio_wait_writeback(folio); folio_wait_private_2(folio); /* [DEPRECATED] */ } if (!clear_page_dirty_for_io(page)) { doutc(cl, "%p !clear_page_dirty_for_io\n", page); unlock_page(page); continue; } /* * We have something to write. If this is * the first locked page this time through, * calculate max possinle write size and * allocate a page array */ if (locked_pages == 0) { u64 objnum; u64 objoff; u32 xlen; /* prepare async write request */ offset = (u64)page_offset(page); ceph_calc_file_object_mapping(&ci->i_layout, offset, wsize, &objnum, &objoff, &xlen); len = xlen; num_ops = 1; strip_unit_end = page->index + ((len - 1) >> PAGE_SHIFT); BUG_ON(pages); max_pages = calc_pages_for(0, (u64)len); pages = kmalloc_array(max_pages, sizeof(*pages), GFP_NOFS); if (!pages) { from_pool = true; pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); BUG_ON(!pages); } len = 0; } else if (page->index != (offset + len) >> PAGE_SHIFT) { if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : CEPH_OSD_MAX_OPS)) { redirty_page_for_writepage(wbc, page); unlock_page(page); break; } num_ops++; offset = (u64)page_offset(page); len = 0; } /* note position of first page in fbatch */ doutc(cl, "%llx.%llx will write page %p idx %lu\n", ceph_vinop(inode), page, page->index); if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH( fsc->mount_options->congestion_kb)) fsc->write_congested = true; if (IS_ENCRYPTED(inode)) { pages[locked_pages] = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE, 0, locked_pages ? GFP_NOWAIT : GFP_NOFS); if (IS_ERR(pages[locked_pages])) { if (PTR_ERR(pages[locked_pages]) == -EINVAL) pr_err_client(cl, "inode->i_blkbits=%hhu\n", inode->i_blkbits); /* better not fail on first page! */ BUG_ON(locked_pages == 0); pages[locked_pages] = NULL; redirty_page_for_writepage(wbc, page); unlock_page(page); break; } ++locked_pages; } else { pages[locked_pages++] = page; } fbatch.folios[i] = NULL; len += thp_size(page); } /* did we get anything? */ if (!locked_pages) goto release_folios; if (i) { unsigned j, n = 0; /* shift unused page to beginning of fbatch */ for (j = 0; j < nr_folios; j++) { if (!fbatch.folios[j]) continue; if (n < j) fbatch.folios[n] = fbatch.folios[j]; n++; } fbatch.nr = n; if (nr_folios && i == nr_folios && locked_pages < max_pages) { doutc(cl, "reached end fbatch, trying for more\n"); folio_batch_release(&fbatch); goto get_more_pages; } } new_request: offset = ceph_fscrypt_page_offset(pages[0]); len = wsize; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, offset, &len, 0, num_ops, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, false); if (IS_ERR(req)) { req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, offset, &len, 0, min(num_ops, CEPH_OSD_SLAB_OPS), CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, true); BUG_ON(IS_ERR(req)); } BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) + thp_size(pages[locked_pages - 1]) - offset); if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { rc = -EIO; goto release_folios; } req->r_callback = writepages_finish; req->r_inode = inode; /* Format the osd request message and submit the write */ len = 0; data_pages = pages; op_idx = 0; for (i = 0; i < locked_pages; i++) { struct page *page = ceph_fscrypt_pagecache_page(pages[i]); u64 cur_offset = page_offset(page); /* * Discontinuity in page range? Ceph can handle that by just passing * multiple extents in the write op. */ if (offset + len != cur_offset) { /* If it's full, stop here */ if (op_idx + 1 == req->r_num_ops) break; /* Kick off an fscache write with what we have so far. */ ceph_fscache_write_to_cache(inode, offset, len, caching); /* Start a new extent */ osd_req_op_extent_dup_last(req, op_idx, cur_offset - offset); doutc(cl, "got pages at %llu~%llu\n", offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, from_pool, false); osd_req_op_extent_update(req, op_idx, len); len = 0; offset = cur_offset; data_pages = pages + i; op_idx++; } set_page_writeback(page); if (caching) ceph_set_page_fscache(page); len += thp_size(page); } ceph_fscache_write_to_cache(inode, offset, len, caching); if (ceph_wbc.size_stable) { len = min(len, ceph_wbc.i_size - offset); } else if (i == locked_pages) { /* writepages_finish() clears writeback pages * according to the data length, so make sure * data length covers all locked pages */ u64 min_len = len + 1 - thp_size(page); len = get_writepages_data_length(inode, pages[i - 1], offset); len = max(len, min_len); } if (IS_ENCRYPTED(inode)) len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE); doutc(cl, "got pages at %llu~%llu\n", offset, len); if (IS_ENCRYPTED(inode) && ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK)) pr_warn_client(cl, "bad encrypted write offset=%lld len=%llu\n", offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, from_pool, false); osd_req_op_extent_update(req, op_idx, len); BUG_ON(op_idx + 1 != req->r_num_ops); from_pool = false; if (i < locked_pages) { BUG_ON(num_ops <= req->r_num_ops); num_ops -= req->r_num_ops; locked_pages -= i; /* allocate new pages array for next request */ data_pages = pages; pages = kmalloc_array(locked_pages, sizeof(*pages), GFP_NOFS); if (!pages) { from_pool = true; pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); BUG_ON(!pages); } memcpy(pages, data_pages + i, locked_pages * sizeof(*pages)); memset(data_pages + i, 0, locked_pages * sizeof(*pages)); } else { BUG_ON(num_ops != req->r_num_ops); index = pages[i - 1]->index + 1; /* request message now owns the pages array */ pages = NULL; } req->r_mtime = inode_get_mtime(inode); ceph_osdc_start_request(&fsc->client->osdc, req); req = NULL; wbc->nr_to_write -= i; if (pages) goto new_request; /* * We stop writing back only if we are not doing * integrity sync. In case of integrity sync we have to * keep going until we have written all the pages * we tagged for writeback prior to entering this loop. */ if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) done = true; release_folios: doutc(cl, "folio_batch release on %d folios (%p)\n", (int)fbatch.nr, fbatch.nr ? fbatch.folios[0] : NULL); folio_batch_release(&fbatch); } if (should_loop && !done) { /* more to do; loop back to beginning of file */ doutc(cl, "looping back to beginning of file\n"); end = start_index - 1; /* OK even when start_index == 0 */ /* to write dirty pages associated with next snapc, * we need to wait until current writes complete */ if (wbc->sync_mode != WB_SYNC_NONE && start_index == 0 && /* all dirty pages were checked */ !ceph_wbc.head_snapc) { struct page *page; unsigned i, nr; index = 0; while ((index <= end) && (nr = filemap_get_folios_tag(mapping, &index, (pgoff_t)-1, PAGECACHE_TAG_WRITEBACK, &fbatch))) { for (i = 0; i < nr; i++) { page = &fbatch.folios[i]->page; if (page_snap_context(page) != snapc) continue; wait_on_page_writeback(page); } folio_batch_release(&fbatch); cond_resched(); } } start_index = 0; index = 0; goto retry; } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; out: ceph_osdc_put_request(req); ceph_put_snap_context(last_snapc); doutc(cl, "%llx.%llx dend - startone, rc = %d\n", ceph_vinop(inode), rc); return rc; } /* * See if a given @snapc is either writeable, or already written. */ static int context_is_writeable_or_written(struct inode *inode, struct ceph_snap_context *snapc) { struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, NULL); int ret = !oldest || snapc->seq <= oldest->seq; ceph_put_snap_context(oldest); return ret; } /** * ceph_find_incompatible - find an incompatible context and return it * @page: page being dirtied * * We are only allowed to write into/dirty a page if the page is * clean, or already dirty within the same snap context. Returns a * conflicting context if there is one, NULL if there isn't, or a * negative error code on other errors. * * Must be called with page lock held. */ static struct ceph_snap_context * ceph_find_incompatible(struct page *page) { struct inode *inode = page->mapping->host; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); if (ceph_inode_is_shutdown(inode)) { doutc(cl, " %llx.%llx page %p is shutdown\n", ceph_vinop(inode), page); return ERR_PTR(-ESTALE); } for (;;) { struct ceph_snap_context *snapc, *oldest; wait_on_page_writeback(page); snapc = page_snap_context(page); if (!snapc || snapc == ci->i_head_snapc) break; /* * this page is already dirty in another (older) snap * context! is it writeable now? */ oldest = get_oldest_context(inode, NULL, NULL); if (snapc->seq > oldest->seq) { /* not writeable -- return it for the caller to deal with */ ceph_put_snap_context(oldest); doutc(cl, " %llx.%llx page %p snapc %p not current or oldest\n", ceph_vinop(inode), page, snapc); return ceph_get_snap_context(snapc); } ceph_put_snap_context(oldest); /* yay, writeable, do it now (without dropping page lock) */ doutc(cl, " %llx.%llx page %p snapc %p not current, but oldest\n", ceph_vinop(inode), page, snapc); if (clear_page_dirty_for_io(page)) { int r = writepage_nounlock(page, NULL); if (r < 0) return ERR_PTR(r); } } return NULL; } static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, struct folio **foliop, void **_fsdata) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; snapc = ceph_find_incompatible(folio_page(*foliop, 0)); if (snapc) { int r; folio_unlock(*foliop); folio_put(*foliop); *foliop = NULL; if (IS_ERR(snapc)) return PTR_ERR(snapc); ceph_queue_writeback(inode); r = wait_event_killable(ci->i_cap_wq, context_is_writeable_or_written(inode, snapc)); ceph_put_snap_context(snapc); return r == 0 ? -EAGAIN : r; } return 0; } /* * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. */ static int ceph_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); int r; r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, foliop, NULL); if (r < 0) return r; folio_wait_private_2(*foliop); /* [DEPRECATED] */ WARN_ON_ONCE(!folio_test_locked(*foliop)); return 0; } /* * we don't do anything in here that simple_write_end doesn't do * except adjust dirty page accounting */ static int ceph_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata) { struct inode *inode = file_inode(file); struct ceph_client *cl = ceph_inode_to_client(inode); bool check_cap = false; doutc(cl, "%llx.%llx file %p folio %p %d~%d (%d)\n", ceph_vinop(inode), file, folio, (int)pos, (int)copied, (int)len); if (!folio_test_uptodate(folio)) { /* just return that nothing was copied on a short copy */ if (copied < len) { copied = 0; goto out; } folio_mark_uptodate(folio); } /* did file size increase? */ if (pos+copied > i_size_read(inode)) check_cap = ceph_inode_set_size(inode, pos+copied); folio_mark_dirty(folio); out: folio_unlock(folio); folio_put(folio); if (check_cap) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY); return copied; } const struct address_space_operations ceph_aops = { .read_folio = netfs_read_folio, .readahead = netfs_readahead, .writepage = ceph_writepage, .writepages = ceph_writepages_start, .write_begin = ceph_write_begin, .write_end = ceph_write_end, .dirty_folio = ceph_dirty_folio, .invalidate_folio = ceph_invalidate_folio, .release_folio = netfs_release_folio, .direct_IO = noop_direct_IO, }; static void ceph_block_sigs(sigset_t *oldset) { sigset_t mask; siginitsetinv(&mask, sigmask(SIGKILL)); sigprocmask(SIG_BLOCK, &mask, oldset); } static void ceph_restore_sigs(sigset_t *oldset) { sigprocmask(SIG_SETMASK, oldset, NULL); } /* * vm ops */ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_file_info *fi = vma->vm_file->private_data; loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; sigset_t oldset; vm_fault_t ret = VM_FAULT_SIGBUS; if (ceph_inode_is_shutdown(inode)) return ret; ceph_block_sigs(&oldset); doutc(cl, "%llx.%llx %llu trying to get caps\n", ceph_vinop(inode), off); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_CACHE; got = 0; err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1, &got); if (err < 0) goto out_restore; doutc(cl, "%llx.%llx %llu got cap refs on %s\n", ceph_vinop(inode), off, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || !ceph_has_inline_data(ci)) { CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); ceph_del_rw_context(fi, &rw_ctx); doutc(cl, "%llx.%llx %llu drop cap refs %s ret %x\n", ceph_vinop(inode), off, ceph_cap_string(got), ret); } else err = -EAGAIN; ceph_put_cap_refs(ci, got); if (err != -EAGAIN) goto out_restore; /* read inline data */ if (off >= PAGE_SIZE) { /* does not support inline data > PAGE_SIZE */ ret = VM_FAULT_SIGBUS; } else { struct address_space *mapping = inode->i_mapping; struct page *page; filemap_invalidate_lock_shared(mapping); page = find_or_create_page(mapping, 0, mapping_gfp_constraint(mapping, ~__GFP_FS)); if (!page) { ret = VM_FAULT_OOM; goto out_inline; } err = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, true); if (err < 0 || off >= i_size_read(inode)) { unlock_page(page); put_page(page); ret = vmf_error(err); goto out_inline; } if (err < PAGE_SIZE) zero_user_segment(page, err, PAGE_SIZE); else flush_dcache_page(page); SetPageUptodate(page); vmf->page = page; ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; out_inline: filemap_invalidate_unlock_shared(mapping); doutc(cl, "%llx.%llx %llu read inline data ret %x\n", ceph_vinop(inode), off, ret); } out_restore: ceph_restore_sigs(&oldset); if (err < 0) ret = vmf_error(err); return ret; } static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct ceph_cap_flush *prealloc_cf; struct page *page = vmf->page; loff_t off = page_offset(page); loff_t size = i_size_read(inode); size_t len; int want, got, err; sigset_t oldset; vm_fault_t ret = VM_FAULT_SIGBUS; if (ceph_inode_is_shutdown(inode)) return ret; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) return VM_FAULT_OOM; sb_start_pagefault(inode->i_sb); ceph_block_sigs(&oldset); if (off + thp_size(page) <= size) len = thp_size(page); else len = offset_in_thp(page, size); doutc(cl, "%llx.%llx %llu~%zd getting caps i_size %llu\n", ceph_vinop(inode), off, len, size); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_BUFFER; got = 0; err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len, &got); if (err < 0) goto out_free; doutc(cl, "%llx.%llx %llu~%zd got cap refs on %s\n", ceph_vinop(inode), off, len, ceph_cap_string(got)); /* Update time before taking page lock */ file_update_time(vma->vm_file); inode_inc_iversion_raw(inode); do { struct ceph_snap_context *snapc; lock_page(page); if (page_mkwrite_check_truncate(page, inode) < 0) { unlock_page(page); ret = VM_FAULT_NOPAGE; break; } snapc = ceph_find_incompatible(page); if (!snapc) { /* success. we'll keep the page locked. */ set_page_dirty(page); ret = VM_FAULT_LOCKED; break; } unlock_page(page); if (IS_ERR(snapc)) { ret = VM_FAULT_SIGBUS; break; } ceph_queue_writeback(inode); err = wait_event_killable(ci->i_cap_wq, context_is_writeable_or_written(inode, snapc)); ceph_put_snap_context(snapc); } while (err == 0); if (ret == VM_FAULT_LOCKED) { int dirty; spin_lock(&ci->i_ceph_lock); dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); } doutc(cl, "%llx.%llx %llu~%zd dropping cap refs on %s ret %x\n", ceph_vinop(inode), off, len, ceph_cap_string(got), ret); ceph_put_cap_refs_async(ci, got); out_free: ceph_restore_sigs(&oldset); sb_end_pagefault(inode->i_sb); ceph_free_cap_flush(prealloc_cf); if (err < 0) ret = vmf_error(err); return ret; } void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len) { struct ceph_client *cl = ceph_inode_to_client(inode); struct address_space *mapping = inode->i_mapping; struct page *page; if (locked_page) { page = locked_page; } else { if (i_size_read(inode) == 0) return; page = find_or_create_page(mapping, 0, mapping_gfp_constraint(mapping, ~__GFP_FS)); if (!page) return; if (PageUptodate(page)) { unlock_page(page); put_page(page); return; } } doutc(cl, "%p %llx.%llx len %zu locked_page %p\n", inode, ceph_vinop(inode), len, locked_page); if (len > 0) { void *kaddr = kmap_atomic(page); memcpy(kaddr, data, len); kunmap_atomic(kaddr); } if (page != locked_page) { if (len < PAGE_SIZE) zero_user_segment(page, len, PAGE_SIZE); else flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); put_page(page); } } int ceph_uninline_data(struct file *file) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct ceph_osd_request *req = NULL; struct ceph_cap_flush *prealloc_cf = NULL; struct folio *folio = NULL; u64 inline_version = CEPH_INLINE_NONE; struct page *pages[1]; int err = 0; u64 len; spin_lock(&ci->i_ceph_lock); inline_version = ci->i_inline_version; spin_unlock(&ci->i_ceph_lock); doutc(cl, "%llx.%llx inline_version %llu\n", ceph_vinop(inode), inline_version); if (ceph_inode_is_shutdown(inode)) { err = -EIO; goto out; } if (inline_version == CEPH_INLINE_NONE) return 0; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) return -ENOMEM; if (inline_version == 1) /* initial version, no data */ goto out_uninline; folio = read_mapping_folio(inode->i_mapping, 0, file); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto out; } folio_lock(folio); len = i_size_read(inode); if (len > folio_size(folio)) len = folio_size(folio); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 0, 1, CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE, NULL, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); goto out_unlock; } req->r_mtime = inode_get_mtime(inode); ceph_osdc_start_request(&fsc->client->osdc, req); err = ceph_osdc_wait_request(&fsc->client->osdc, req); ceph_osdc_put_request(req); if (err < 0) goto out_unlock; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 1, 3, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { err = PTR_ERR(req); goto out_unlock; } pages[0] = folio_page(folio, 0); osd_req_op_extent_osd_data_pages(req, 1, pages, len, 0, false, false); { __le64 xattr_buf = cpu_to_le64(inline_version); err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, "inline_version", &xattr_buf, sizeof(xattr_buf), CEPH_OSD_CMPXATTR_OP_GT, CEPH_OSD_CMPXATTR_MODE_U64); if (err) goto out_put_req; } { char xattr_buf[32]; int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf), "%llu", inline_version); err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, "inline_version", xattr_buf, xattr_len, 0, 0); if (err) goto out_put_req; } req->r_mtime = inode_get_mtime(inode); ceph_osdc_start_request(&fsc->client->osdc, req); err = ceph_osdc_wait_request(&fsc->client->osdc, req); ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, len, err); out_uninline: if (!err) { int dirty; /* Set to CAP_INLINE_NONE and dirty the caps */ down_read(&fsc->mdsc->snap_rwsem); spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); up_read(&fsc->mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); } out_put_req: ceph_osdc_put_request(req); if (err == -ECANCELED) err = 0; out_unlock: if (folio) { folio_unlock(folio); folio_put(folio); } out: ceph_free_cap_flush(prealloc_cf); doutc(cl, "%llx.%llx inline_version %llu = %d\n", ceph_vinop(inode), inline_version, err); return err; } static const struct vm_operations_struct ceph_vmops = { .fault = ceph_filemap_fault, .page_mkwrite = ceph_page_mkwrite, }; int ceph_mmap(struct file *file, struct vm_area_struct *vma) { struct address_space *mapping = file->f_mapping; if (!mapping->a_ops->read_folio) return -ENOEXEC; vma->vm_ops = &ceph_vmops; return 0; } enum { POOL_READ = 1, POOL_WRITE = 2, }; static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool, struct ceph_string *pool_ns) { struct ceph_fs_client *fsc = ceph_inode_to_fs_client(&ci->netfs.inode); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_client *cl = fsc->client; struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; struct rb_node **p, *parent; struct ceph_pool_perm *perm; struct page **pages; size_t pool_ns_len; int err = 0, err2 = 0, have = 0; down_read(&mdsc->pool_perm_rwsem); p = &mdsc->pool_perm_tree.rb_node; while (*p) { perm = rb_entry(*p, struct ceph_pool_perm, node); if (pool < perm->pool) p = &(*p)->rb_left; else if (pool > perm->pool) p = &(*p)->rb_right; else { int ret = ceph_compare_string(pool_ns, perm->pool_ns, perm->pool_ns_len); if (ret < 0) p = &(*p)->rb_left; else if (ret > 0) p = &(*p)->rb_right; else { have = perm->perm; break; } } } up_read(&mdsc->pool_perm_rwsem); if (*p) goto out; if (pool_ns) doutc(cl, "pool %lld ns %.*s no perm cached\n", pool, (int)pool_ns->len, pool_ns->str); else doutc(cl, "pool %lld no perm cached\n", pool); down_write(&mdsc->pool_perm_rwsem); p = &mdsc->pool_perm_tree.rb_node; parent = NULL; while (*p) { parent = *p; perm = rb_entry(parent, struct ceph_pool_perm, node); if (pool < perm->pool) p = &(*p)->rb_left; else if (pool > perm->pool) p = &(*p)->rb_right; else { int ret = ceph_compare_string(pool_ns, perm->pool_ns, perm->pool_ns_len); if (ret < 0) p = &(*p)->rb_left; else if (ret > 0) p = &(*p)->rb_right; else { have = perm->perm; break; } } } if (*p) { up_write(&mdsc->pool_perm_rwsem); goto out; } rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL, 1, false, GFP_NOFS); if (!rd_req) { err = -ENOMEM; goto out_unlock; } rd_req->r_flags = CEPH_OSD_FLAG_READ; osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); rd_req->r_base_oloc.pool = pool; if (pool_ns) rd_req->r_base_oloc.pool_ns = ceph_get_string(pool_ns); ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino); err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS); if (err) goto out_unlock; wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL, 1, false, GFP_NOFS); if (!wr_req) { err = -ENOMEM; goto out_unlock; } wr_req->r_flags = CEPH_OSD_FLAG_WRITE; osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc); ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid); err = ceph_osdc_alloc_messages(wr_req, GFP_NOFS); if (err) goto out_unlock; /* one page should be large enough for STAT data */ pages = ceph_alloc_page_vector(1, GFP_KERNEL); if (IS_ERR(pages)) { err = PTR_ERR(pages); goto out_unlock; } osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE, 0, false, true); ceph_osdc_start_request(&fsc->client->osdc, rd_req); wr_req->r_mtime = inode_get_mtime(&ci->netfs.inode); ceph_osdc_start_request(&fsc->client->osdc, wr_req); err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req); err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req); if (err >= 0 || err == -ENOENT) have |= POOL_READ; else if (err != -EPERM) { if (err == -EBLOCKLISTED) fsc->blocklisted = true; goto out_unlock; } if (err2 == 0 || err2 == -EEXIST) have |= POOL_WRITE; else if (err2 != -EPERM) { if (err2 == -EBLOCKLISTED) fsc->blocklisted = true; err = err2; goto out_unlock; } pool_ns_len = pool_ns ? pool_ns->len : 0; perm = kmalloc(struct_size(perm, pool_ns, pool_ns_len + 1), GFP_NOFS); if (!perm) { err = -ENOMEM; goto out_unlock; } perm->pool = pool; perm->perm = have; perm->pool_ns_len = pool_ns_len; if (pool_ns_len > 0) memcpy(perm->pool_ns, pool_ns->str, pool_ns_len); perm->pool_ns[pool_ns_len] = 0; rb_link_node(&perm->node, parent, p); rb_insert_color(&perm->node, &mdsc->pool_perm_tree); err = 0; out_unlock: up_write(&mdsc->pool_perm_rwsem); ceph_osdc_put_request(rd_req); ceph_osdc_put_request(wr_req); out: if (!err) err = have; if (pool_ns) doutc(cl, "pool %lld ns %.*s result = %d\n", pool, (int)pool_ns->len, pool_ns->str, err); else doutc(cl, "pool %lld result = %d\n", pool, err); return err; } int ceph_pool_perm_check(struct inode *inode, int need) { struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_string *pool_ns; s64 pool; int ret, flags; /* Only need to do this for regular files */ if (!S_ISREG(inode->i_mode)) return 0; if (ci->i_vino.snap != CEPH_NOSNAP) { /* * Pool permission check needs to write to the first object. * But for snapshot, head of the first object may have already * been deleted. Skip check to avoid creating orphan object. */ return 0; } if (ceph_test_mount_opt(ceph_inode_to_fs_client(inode), NOPOOLPERM)) return 0; spin_lock(&ci->i_ceph_lock); flags = ci->i_ceph_flags; pool = ci->i_layout.pool_id; spin_unlock(&ci->i_ceph_lock); check: if (flags & CEPH_I_POOL_PERM) { if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) { doutc(cl, "pool %lld no read perm\n", pool); return -EPERM; } if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) { doutc(cl, "pool %lld no write perm\n", pool); return -EPERM; } return 0; } pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); ret = __ceph_pool_perm_get(ci, pool, pool_ns); ceph_put_string(pool_ns); if (ret < 0) return ret; flags = CEPH_I_POOL_PERM; if (ret & POOL_READ) flags |= CEPH_I_POOL_RD; if (ret & POOL_WRITE) flags |= CEPH_I_POOL_WR; spin_lock(&ci->i_ceph_lock); if (pool == ci->i_layout.pool_id && pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) { ci->i_ceph_flags |= flags; } else { pool = ci->i_layout.pool_id; flags = ci->i_ceph_flags; } spin_unlock(&ci->i_ceph_lock); goto check; } void ceph_pool_perm_destroy(struct ceph_mds_client *mdsc) { struct ceph_pool_perm *perm; struct rb_node *n; while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) { n = rb_first(&mdsc->pool_perm_tree); perm = rb_entry(n, struct ceph_pool_perm, node); rb_erase(n, &mdsc->pool_perm_tree); kfree(perm); } }
13 2 1 1 4 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> struct nft_range_expr { struct nft_data data_from; struct nft_data data_to; u8 sreg; u8 len; enum nft_range_ops op:8; }; void nft_range_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_range_expr *priv = nft_expr_priv(expr); int d1, d2; d1 = memcmp(&regs->data[priv->sreg], &priv->data_from, priv->len); d2 = memcmp(&regs->data[priv->sreg], &priv->data_to, priv->len); switch (priv->op) { case NFT_RANGE_EQ: if (d1 < 0 || d2 > 0) regs->verdict.code = NFT_BREAK; break; case NFT_RANGE_NEQ: if (d1 >= 0 && d2 <= 0) regs->verdict.code = NFT_BREAK; break; } } static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { [NFTA_RANGE_SREG] = { .type = NLA_U32 }, [NFTA_RANGE_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, }; static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_range_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc_from = { .type = NFT_DATA_VALUE, .size = sizeof(priv->data_from), }; struct nft_data_desc desc_to = { .type = NFT_DATA_VALUE, .size = sizeof(priv->data_to), }; int err; u32 op; if (!tb[NFTA_RANGE_SREG] || !tb[NFTA_RANGE_OP] || !tb[NFTA_RANGE_FROM_DATA] || !tb[NFTA_RANGE_TO_DATA]) return -EINVAL; err = nft_data_init(NULL, &priv->data_from, &desc_from, tb[NFTA_RANGE_FROM_DATA]); if (err < 0) return err; err = nft_data_init(NULL, &priv->data_to, &desc_to, tb[NFTA_RANGE_TO_DATA]); if (err < 0) goto err1; if (desc_from.len != desc_to.len) { err = -EINVAL; goto err2; } err = nft_parse_register_load(ctx, tb[NFTA_RANGE_SREG], &priv->sreg, desc_from.len); if (err < 0) goto err2; err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op); if (err < 0) goto err2; switch (op) { case NFT_RANGE_EQ: case NFT_RANGE_NEQ: break; default: err = -EINVAL; goto err2; } priv->op = op; priv->len = desc_from.len; return 0; err2: nft_data_release(&priv->data_to, desc_to.type); err1: nft_data_release(&priv->data_from, desc_from.type); return err; } static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_range_expr *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op))) goto nla_put_failure; if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from, NFT_DATA_VALUE, priv->len) < 0 || nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to, NFT_DATA_VALUE, priv->len) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nft_expr_ops nft_range_ops = { .type = &nft_range_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), .eval = nft_range_eval, .init = nft_range_init, .dump = nft_range_dump, .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_range_type __read_mostly = { .name = "range", .ops = &nft_range_ops, .policy = nft_range_policy, .maxattr = NFTA_RANGE_MAX, .owner = THIS_MODULE, };
1 1 1 1 1 2 6 25 25 9 3 6 4 4 4 20 20 1 4 4 5 6 3 20 3 3 3 3 3 3 3 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 // SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) Hans Alblas PE1AYX <hans@esrac.ele.tue.nl> * Copyright (C) 2004, 05 Ralf Baechle DL5RB <ralf@linux-mips.org> * Copyright (C) 2004, 05 Thomas Osterried DL9SAU <thomas@x-berg.in-berlin.de> */ #include <linux/module.h> #include <linux/bitops.h> #include <linux/uaccess.h> #include <linux/crc16.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/major.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/jiffies.h> #include <linux/refcount.h> #include <net/ax25.h> #define AX_MTU 236 /* some arch define END as assembly function ending, just undef it */ #undef END /* SLIP/KISS protocol characters. */ #define END 0300 /* indicates end of frame */ #define ESC 0333 /* indicates byte stuffing */ #define ESC_END 0334 /* ESC ESC_END means END 'data' */ #define ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ struct mkiss { struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ /* These are pointers to the malloc()ed frame buffers. */ spinlock_t buflock;/* lock for rbuf and xbuf */ unsigned char *rbuff; /* receiver buffer */ int rcount; /* received chars counter */ unsigned char *xbuff; /* transmitter buffer */ unsigned char *xhead; /* pointer to next byte to XMIT */ int xleft; /* bytes left in XMIT queue */ /* Detailed SLIP statistics. */ int mtu; /* Our mtu (to spot changes!) */ int buffsize; /* Max buffers sizes */ unsigned long flags; /* Flag values/ mode etc */ /* long req'd: used by set_bit --RR */ #define AXF_INUSE 0 /* Channel in use */ #define AXF_ESCAPE 1 /* ESC received */ #define AXF_ERROR 2 /* Parity, etc. error */ #define AXF_KEEPTEST 3 /* Keepalive test flag */ #define AXF_OUTWAIT 4 /* is outpacket was flag */ int mode; int crcmode; /* MW: for FlexNet, SMACK etc. */ int crcauto; /* CRC auto mode */ #define CRC_MODE_NONE 0 #define CRC_MODE_FLEX 1 #define CRC_MODE_SMACK 2 #define CRC_MODE_FLEX_TEST 3 #define CRC_MODE_SMACK_TEST 4 refcount_t refcnt; struct completion dead; }; /*---------------------------------------------------------------------------*/ static const unsigned short crc_flex_table[] = { 0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38, 0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770, 0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9, 0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1, 0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a, 0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672, 0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb, 0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3, 0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c, 0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574, 0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd, 0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5, 0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e, 0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476, 0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf, 0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7, 0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30, 0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378, 0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1, 0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9, 0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32, 0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a, 0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3, 0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb, 0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34, 0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c, 0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5, 0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd, 0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36, 0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e, 0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7, 0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff }; static unsigned short calc_crc_flex(unsigned char *cp, int size) { unsigned short crc = 0xffff; while (size--) crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; return crc; } static int check_crc_flex(unsigned char *cp, int size) { unsigned short crc = 0xffff; if (size < 3) return -1; while (size--) crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; if ((crc & 0xffff) != 0x7070) return -1; return 0; } static int check_crc_16(unsigned char *cp, int size) { unsigned short crc = 0x0000; if (size < 3) return -1; crc = crc16(0, cp, size); if (crc != 0x0000) return -1; return 0; } /* * Standard encapsulation */ static int kiss_esc(unsigned char *s, unsigned char *d, int len) { unsigned char *ptr = d; unsigned char c; /* * Send an initial END character to flush out any data that may have * accumulated in the receiver due to line noise. */ *ptr++ = END; while (len-- > 0) { switch (c = *s++) { case END: *ptr++ = ESC; *ptr++ = ESC_END; break; case ESC: *ptr++ = ESC; *ptr++ = ESC_ESC; break; default: *ptr++ = c; break; } } *ptr++ = END; return ptr - d; } /* * MW: * OK its ugly, but tell me a better solution without copying the * packet to a temporary buffer :-) */ static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, int len) { unsigned char *ptr = d; unsigned char c=0; *ptr++ = END; while (len > 0) { if (len > 2) c = *s++; else if (len > 1) c = crc >> 8; else c = crc & 0xff; len--; switch (c) { case END: *ptr++ = ESC; *ptr++ = ESC_END; break; case ESC: *ptr++ = ESC; *ptr++ = ESC_ESC; break; default: *ptr++ = c; break; } } *ptr++ = END; return ptr - d; } /* Send one completely decapsulated AX.25 packet to the AX.25 layer. */ static void ax_bump(struct mkiss *ax) { struct sk_buff *skb; int count; spin_lock_bh(&ax->buflock); if (ax->rbuff[0] > 0x0f) { if (ax->rbuff[0] & 0x80) { if (check_crc_16(ax->rbuff, ax->rcount) < 0) { ax->dev->stats.rx_errors++; spin_unlock_bh(&ax->buflock); return; } if (ax->crcmode != CRC_MODE_SMACK && ax->crcauto) { printk(KERN_INFO "mkiss: %s: Switching to crc-smack\n", ax->dev->name); ax->crcmode = CRC_MODE_SMACK; } ax->rcount -= 2; *ax->rbuff &= ~0x80; } else if (ax->rbuff[0] & 0x20) { if (check_crc_flex(ax->rbuff, ax->rcount) < 0) { ax->dev->stats.rx_errors++; spin_unlock_bh(&ax->buflock); return; } if (ax->crcmode != CRC_MODE_FLEX && ax->crcauto) { printk(KERN_INFO "mkiss: %s: Switching to crc-flexnet\n", ax->dev->name); ax->crcmode = CRC_MODE_FLEX; } ax->rcount -= 2; /* * dl9sau bugfix: the trailling two bytes flexnet crc * will not be passed to the kernel. thus we have to * correct the kissparm signature, because it indicates * a crc but there's none */ *ax->rbuff &= ~0x20; } } count = ax->rcount; if ((skb = dev_alloc_skb(count)) == NULL) { printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n", ax->dev->name); ax->dev->stats.rx_dropped++; spin_unlock_bh(&ax->buflock); return; } skb_put_data(skb, ax->rbuff, count); skb->protocol = ax25_type_trans(skb, ax->dev); netif_rx(skb); ax->dev->stats.rx_packets++; ax->dev->stats.rx_bytes += count; spin_unlock_bh(&ax->buflock); } static void kiss_unesc(struct mkiss *ax, unsigned char s) { switch (s) { case END: /* drop keeptest bit = VSV */ if (test_bit(AXF_KEEPTEST, &ax->flags)) clear_bit(AXF_KEEPTEST, &ax->flags); if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2)) ax_bump(ax); clear_bit(AXF_ESCAPE, &ax->flags); ax->rcount = 0; return; case ESC: set_bit(AXF_ESCAPE, &ax->flags); return; case ESC_ESC: if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) s = ESC; break; case ESC_END: if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) s = END; break; } spin_lock_bh(&ax->buflock); if (!test_bit(AXF_ERROR, &ax->flags)) { if (ax->rcount < ax->buffsize) { ax->rbuff[ax->rcount++] = s; spin_unlock_bh(&ax->buflock); return; } ax->dev->stats.rx_over_errors++; set_bit(AXF_ERROR, &ax->flags); } spin_unlock_bh(&ax->buflock); } static int ax_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr_ax25 *sa = addr; netif_tx_lock_bh(dev); netif_addr_lock(dev); __dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); return 0; } /*---------------------------------------------------------------------------*/ static void ax_changedmtu(struct mkiss *ax) { struct net_device *dev = ax->dev; unsigned char *xbuff, *rbuff, *oxbuff, *orbuff; int len; len = dev->mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); if (xbuff == NULL || rbuff == NULL) { printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, " "MTU change cancelled.\n", ax->dev->name); dev->mtu = ax->mtu; kfree(xbuff); kfree(rbuff); return; } spin_lock_bh(&ax->buflock); oxbuff = ax->xbuff; ax->xbuff = xbuff; orbuff = ax->rbuff; ax->rbuff = rbuff; if (ax->xleft) { if (ax->xleft <= len) { memcpy(ax->xbuff, ax->xhead, ax->xleft); } else { ax->xleft = 0; dev->stats.tx_dropped++; } } ax->xhead = ax->xbuff; if (ax->rcount) { if (ax->rcount <= len) { memcpy(ax->rbuff, orbuff, ax->rcount); } else { ax->rcount = 0; dev->stats.rx_over_errors++; set_bit(AXF_ERROR, &ax->flags); } } ax->mtu = dev->mtu + 73; ax->buffsize = len; spin_unlock_bh(&ax->buflock); kfree(oxbuff); kfree(orbuff); } /* Encapsulate one AX.25 packet and stuff into a TTY queue. */ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) { struct mkiss *ax = netdev_priv(dev); unsigned char *p; int actual, count; if (ax->mtu != ax->dev->mtu + 73) /* Someone has been ifconfigging */ ax_changedmtu(ax); if (len > ax->mtu) { /* Sigh, shouldn't occur BUT ... */ printk(KERN_ERR "mkiss: %s: truncating oversized transmit packet!\n", ax->dev->name); dev->stats.tx_dropped++; netif_start_queue(dev); return; } p = icp; spin_lock_bh(&ax->buflock); if ((*p & 0x0f) != 0) { /* Configuration Command (kissparms(1). * Protocol spec says: never append CRC. * This fixes a very old bug in the linux * kiss driver. -- dl9sau */ switch (*p & 0xff) { case 0x85: /* command from userspace especially for us, * not for delivery to the tnc */ if (len > 1) { int cmd = (p[1] & 0xff); switch(cmd) { case 3: ax->crcmode = CRC_MODE_SMACK; break; case 2: ax->crcmode = CRC_MODE_FLEX; break; case 1: ax->crcmode = CRC_MODE_NONE; break; case 0: default: ax->crcmode = CRC_MODE_SMACK_TEST; cmd = 0; } ax->crcauto = (cmd ? 0 : 1); printk(KERN_INFO "mkiss: %s: crc mode set to %d\n", ax->dev->name, cmd); } spin_unlock_bh(&ax->buflock); netif_start_queue(dev); return; default: count = kiss_esc(p, ax->xbuff, len); } } else { unsigned short crc; switch (ax->crcmode) { case CRC_MODE_SMACK_TEST: ax->crcmode = CRC_MODE_FLEX_TEST; printk(KERN_INFO "mkiss: %s: Trying crc-smack\n", ax->dev->name); fallthrough; case CRC_MODE_SMACK: *p |= 0x80; crc = swab16(crc16(0, p, len)); count = kiss_esc_crc(p, ax->xbuff, crc, len+2); break; case CRC_MODE_FLEX_TEST: ax->crcmode = CRC_MODE_NONE; printk(KERN_INFO "mkiss: %s: Trying crc-flexnet\n", ax->dev->name); fallthrough; case CRC_MODE_FLEX: *p |= 0x20; crc = calc_crc_flex(p, len); count = kiss_esc_crc(p, ax->xbuff, crc, len+2); break; default: count = kiss_esc(p, ax->xbuff, len); } } spin_unlock_bh(&ax->buflock); set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); actual = ax->tty->ops->write(ax->tty, ax->xbuff, count); dev->stats.tx_packets++; dev->stats.tx_bytes += actual; netif_trans_update(ax->dev); ax->xleft = count - actual; ax->xhead = ax->xbuff + actual; } /* Encapsulate an AX.25 packet and kick it into a TTY queue. */ static netdev_tx_t ax_xmit(struct sk_buff *skb, struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (skb->protocol == htons(ETH_P_IP)) return ax25_ip_xmit(skb); if (!netif_running(dev)) { printk(KERN_ERR "mkiss: %s: xmit call when iface is down\n", dev->name); return NETDEV_TX_BUSY; } if (netif_queue_stopped(dev)) { /* * May be we must check transmitter timeout here ? * 14 Oct 1994 Dmitry Gorodchanin. */ if (time_before(jiffies, dev_trans_start(dev) + 20 * HZ)) { /* 20 sec timeout not reached */ return NETDEV_TX_BUSY; } printk(KERN_ERR "mkiss: %s: transmit timed out, %s?\n", dev->name, (tty_chars_in_buffer(ax->tty) || ax->xleft) ? "bad line quality" : "driver error"); ax->xleft = 0; clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); netif_start_queue(dev); } /* We were not busy, so we are now... :-) */ netif_stop_queue(dev); ax_encaps(dev, skb->data, skb->len); kfree_skb(skb); return NETDEV_TX_OK; } static int ax_open_dev(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (ax->tty == NULL) return -ENODEV; return 0; } /* Open the low-level part of the AX25 channel. Easy! */ static int ax_open(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); unsigned long len; if (ax->tty == NULL) return -ENODEV; /* * Allocate the frame buffers: * * rbuff Receive buffer. * xbuff Transmit buffer. */ len = dev->mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; if ((ax->rbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL) goto norbuff; if ((ax->xbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL) goto noxbuff; ax->mtu = dev->mtu + 73; ax->buffsize = len; ax->rcount = 0; ax->xleft = 0; ax->flags &= (1 << AXF_INUSE); /* Clear ESCAPE & ERROR flags */ spin_lock_init(&ax->buflock); return 0; noxbuff: kfree(ax->rbuff); norbuff: return -ENOMEM; } /* Close the low-level part of the AX25 channel. Easy! */ static int ax_close(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (ax->tty) clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); netif_stop_queue(dev); return 0; } static const struct net_device_ops ax_netdev_ops = { .ndo_open = ax_open_dev, .ndo_stop = ax_close, .ndo_start_xmit = ax_xmit, .ndo_set_mac_address = ax_set_mac_address, }; static void ax_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->mtu = AX_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_AX25; dev->tx_queue_len = 10; dev->header_ops = &ax25_header_ops; dev->netdev_ops = &ax_netdev_ops; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); dev_addr_set(dev, (u8 *)&ax25_defaddr); dev->flags = IFF_BROADCAST | IFF_MULTICAST; } /* * We have a potential race on dereferencing tty->disc_data, because the tty * layer provides no locking at all - thus one cpu could be running * sixpack_receive_buf while another calls sixpack_close, which zeroes * tty->disc_data and frees the memory that sixpack_receive_buf is using. The * best way to fix this is to use a rwlock in the tty struct, but for now we * use a single global rwlock for all ttys in ppp line discipline. */ static DEFINE_RWLOCK(disc_data_lock); static struct mkiss *mkiss_get(struct tty_struct *tty) { struct mkiss *ax; read_lock(&disc_data_lock); ax = tty->disc_data; if (ax) refcount_inc(&ax->refcnt); read_unlock(&disc_data_lock); return ax; } static void mkiss_put(struct mkiss *ax) { if (refcount_dec_and_test(&ax->refcnt)) complete(&ax->dead); } static int crc_force = 0; /* Can be overridden with insmod */ static int mkiss_open(struct tty_struct *tty) { struct net_device *dev; struct mkiss *ax; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EOPNOTSUPP; dev = alloc_netdev(sizeof(struct mkiss), "ax%d", NET_NAME_UNKNOWN, ax_setup); if (!dev) { err = -ENOMEM; goto out; } ax = netdev_priv(dev); ax->dev = dev; spin_lock_init(&ax->buflock); refcount_set(&ax->refcnt, 1); init_completion(&ax->dead); ax->tty = tty; tty->disc_data = ax; tty->receive_room = 65535; tty_driver_flush_buffer(tty); /* Restore default settings */ dev->type = ARPHRD_AX25; /* Perform the low-level AX25 initialization. */ err = ax_open(ax->dev); if (err) goto out_free_netdev; err = register_netdev(dev); if (err) goto out_free_buffers; /* after register_netdev() - because else printk smashes the kernel */ switch (crc_force) { case 3: ax->crcmode = CRC_MODE_SMACK; printk(KERN_INFO "mkiss: %s: crc mode smack forced.\n", ax->dev->name); break; case 2: ax->crcmode = CRC_MODE_FLEX; printk(KERN_INFO "mkiss: %s: crc mode flexnet forced.\n", ax->dev->name); break; case 1: ax->crcmode = CRC_MODE_NONE; printk(KERN_INFO "mkiss: %s: crc mode disabled.\n", ax->dev->name); break; case 0: default: crc_force = 0; printk(KERN_INFO "mkiss: %s: crc mode is auto.\n", ax->dev->name); ax->crcmode = CRC_MODE_SMACK_TEST; } ax->crcauto = (crc_force ? 0 : 1); netif_start_queue(dev); /* Done. We have linked the TTY line to a channel. */ return 0; out_free_buffers: kfree(ax->rbuff); kfree(ax->xbuff); out_free_netdev: free_netdev(dev); out: return err; } static void mkiss_close(struct tty_struct *tty) { struct mkiss *ax; write_lock_irq(&disc_data_lock); ax = tty->disc_data; tty->disc_data = NULL; write_unlock_irq(&disc_data_lock); if (!ax) return; /* * We have now ensured that nobody can start using ap from now on, but * we have to wait for all existing users to finish. */ if (!refcount_dec_and_test(&ax->refcnt)) wait_for_completion(&ax->dead); /* * Halt the transmit queue so that a new transmit cannot scribble * on our buffers */ netif_stop_queue(ax->dev); unregister_netdev(ax->dev); /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); ax->tty = NULL; free_netdev(ax->dev); } /* Perform I/O control on an active ax25 channel. */ static int mkiss_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct mkiss *ax = mkiss_get(tty); struct net_device *dev; unsigned int tmp, err; /* First make sure we're connected. */ if (ax == NULL) return -ENXIO; dev = ax->dev; switch (cmd) { case SIOCGIFNAME: err = copy_to_user((void __user *) arg, ax->dev->name, strlen(ax->dev->name) + 1) ? -EFAULT : 0; break; case SIOCGIFENCAP: err = put_user(4, (int __user *) arg); break; case SIOCSIFENCAP: if (get_user(tmp, (int __user *) arg)) { err = -EFAULT; break; } ax->mode = tmp; dev->addr_len = AX25_ADDR_LEN; dev->hard_header_len = AX25_KISS_HEADER_LEN + AX25_MAX_HEADER_LEN + 3; dev->type = ARPHRD_AX25; err = 0; break; case SIOCSIFHWADDR: { char addr[AX25_ADDR_LEN]; if (copy_from_user(&addr, (void __user *) arg, AX25_ADDR_LEN)) { err = -EFAULT; break; } netif_tx_lock_bh(dev); __dev_addr_set(dev, addr, AX25_ADDR_LEN); netif_tx_unlock_bh(dev); err = 0; break; } default: err = -ENOIOCTLCMD; } mkiss_put(ax); return err; } /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when * a block of data has been received, which can now be decapsulated * and sent on to the AX.25 layer for further processing. */ static void mkiss_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct mkiss *ax = mkiss_get(tty); if (!ax) return; /* * Argh! mtu change time! - costs us the packet part received * at the change */ if (ax->mtu != ax->dev->mtu + 73) ax_changedmtu(ax); /* Read the characters out of the buffer */ while (count--) { if (fp != NULL && *fp++) { if (!test_and_set_bit(AXF_ERROR, &ax->flags)) ax->dev->stats.rx_errors++; cp++; continue; } kiss_unesc(ax, *cp++); } mkiss_put(ax); tty_unthrottle(tty); } /* * Called by the driver when there's room for more data. If we have * more packets to send, we send them here. */ static void mkiss_write_wakeup(struct tty_struct *tty) { struct mkiss *ax = mkiss_get(tty); int actual; if (!ax) return; if (ax->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); netif_wake_queue(ax->dev); goto out; } actual = tty->ops->write(tty, ax->xhead, ax->xleft); ax->xleft -= actual; ax->xhead += actual; out: mkiss_put(ax); } static struct tty_ldisc_ops ax_ldisc = { .owner = THIS_MODULE, .num = N_AX25, .name = "mkiss", .open = mkiss_open, .close = mkiss_close, .ioctl = mkiss_ioctl, .receive_buf = mkiss_receive_buf, .write_wakeup = mkiss_write_wakeup }; static const char banner[] __initconst = KERN_INFO \ "mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n"; static const char msg_regfail[] __initconst = KERN_ERR \ "mkiss: can't register line discipline (err = %d)\n"; static int __init mkiss_init_driver(void) { int status; printk(banner); status = tty_register_ldisc(&ax_ldisc); if (status != 0) printk(msg_regfail, status); return status; } static void __exit mkiss_exit_driver(void) { tty_unregister_ldisc(&ax_ldisc); } MODULE_AUTHOR("Ralf Baechle DL5RB <ralf@linux-mips.org>"); MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs"); module_param(crc_force, int, 0); MODULE_PARM_DESC(crc_force, "crc [0 = auto | 1 = none | 2 = flexnet | 3 = smack]"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_AX25); module_init(mkiss_init_driver); module_exit(mkiss_exit_driver);
35 7 7 7 14 14 14 27 1 2 1 21 1 1 2 2 2 1 2 2 10 1 4 14 2 13 1 6 6 4 2 6 6 6 1813 1814 1810 324 30 1 8 8 12 12 12 4 8 12 14 12 11 2 5 2 3 2 5 2 3 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_mirred.c packet mirroring and redirect actions * * Authors: Jamal Hadi Salim (2002-4) * * TODO: Add ingress support (and socket redirect support) */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> #include <linux/gfp.h> #include <linux/if_arp.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_wrapper.h> static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); #define MIRRED_NEST_LIMIT 4 static DEFINE_PER_CPU(unsigned int, mirred_nest_level); static bool tcf_mirred_is_act_redirect(int action) { return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; } static bool tcf_mirred_act_wants_ingress(int action) { switch (action) { case TCA_EGRESS_REDIR: case TCA_EGRESS_MIRROR: return false; case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: return true; default: BUG(); } } static bool tcf_mirred_can_reinsert(int action) { switch (action) { case TC_ACT_SHOT: case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: return true; } return false; } static struct net_device *tcf_mirred_dev_dereference(struct tcf_mirred *m) { return rcu_dereference_protected(m->tcfm_dev, lockdep_is_held(&m->tcf_lock)); } static void tcf_mirred_release(struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; spin_lock(&mirred_list_lock); list_del(&m->tcfm_list); spin_unlock(&mirred_list_lock); /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); netdev_put(dev, &m->tcfm_dev_tracker); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1), }; static struct tc_action_ops act_mirred_ops; static void tcf_mirred_replace_dev(struct tcf_mirred *m, struct net_device *ndev) { struct net_device *odev; odev = rcu_replace_pointer(m->tcfm_dev, ndev, lockdep_is_held(&m->tcf_lock)); netdev_put(odev, &m->tcfm_dev_tracker); } static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; bool exists = false; int ret, err; u32 index; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; } ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); if (ret < 0) return ret; if (!tb[TCA_MIRRED_PARMS]) { NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters"); return -EINVAL; } parm = nla_data(tb[TCA_MIRRED_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Cannot specify Block ID and dev simultaneously"); if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: break; default: if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); return -EINVAL; } if (!exists) { if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) { tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Must specify device or block"); return -EINVAL; } ret = tcf_idr_create_from_flags(tn, index, est, a, &act_mirred_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } m = to_mirred(*a); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&m->tcfm_list); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { struct net_device *ndev; ndev = dev_get_by_index(net, parm->ifindex); if (!ndev) { spin_unlock_bh(&m->tcf_lock); err = -ENODEV; goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(ndev); tcf_mirred_replace_dev(m, ndev); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; m->tcfm_blockid = 0; } else if (tb[TCA_MIRRED_BLOCKID]) { tcf_mirred_replace_dev(m, NULL); m->tcfm_mac_header_xmit = false; m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]); } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); m->tcfm_eaction = parm->eaction; spin_unlock_bh(&m->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) { spin_lock(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock(&mirred_list_lock); } return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static int tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); return err; } static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, struct net_device *dev, const bool m_mac_header_xmit, int m_eaction, int retval) { struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; bool dont_clone; int mac_len; bool at_nh; int err; is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ at_ingress = skb_at_tc_ingress(skb); dont_clone = skb_at_tc_ingress(skb) && is_redirect && tcf_mirred_can_reinsert(retval); if (!dont_clone) { skb_to_send = skb_clone(skb, GFP_ATOMIC); if (!skb_to_send) goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ skb_push_rcsum(skb_to_send, mac_len); } } skb_to_send->skb_iif = skb->dev->ifindex; skb_to_send->dev = dev; if (is_redirect) { if (skb == skb_to_send) retval = TC_ACT_CONSUMED; skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } else { err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } if (err) tcf_action_inc_overlimit_qstats(&m->common); return retval; err_cant_do: if (is_redirect) retval = TC_ACT_SHOT; tcf_action_inc_overlimit_qstats(&m->common); return retval; } static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, struct tcf_block *block, int m_eaction, const u32 exception_ifindex, int retval) { struct net_device *dev_prev = NULL; struct net_device *dev = NULL; unsigned long index; int mirred_eaction; mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ? TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR; xa_for_each(&block->ports, index, dev) { if (index == exception_ifindex) continue; if (!dev_prev) goto assign_prev; tcf_mirred_to_dev(skb, m, dev_prev, dev_is_mac_header_xmit(dev), mirred_eaction, retval); assign_prev: dev_prev = dev; } if (dev_prev) return tcf_mirred_to_dev(skb, m, dev_prev, dev_is_mac_header_xmit(dev_prev), m_eaction, retval); return retval; } static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, struct tcf_block *block, int m_eaction, const u32 exception_ifindex, int retval) { struct net_device *dev = NULL; unsigned long index; xa_for_each(&block->ports, index, dev) { if (index == exception_ifindex) continue; tcf_mirred_to_dev(skb, m, dev, dev_is_mac_header_xmit(dev), m_eaction, retval); } return retval; } static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, const u32 blockid, struct tcf_result *res, int retval) { const u32 exception_ifindex = skb->dev->ifindex; struct tcf_block *block; bool is_redirect; int m_eaction; m_eaction = READ_ONCE(m->tcfm_eaction); is_redirect = tcf_mirred_is_act_redirect(m_eaction); /* we are already under rcu protection, so can call block lookup * directly. */ block = tcf_block_lookup(dev_net(skb->dev), blockid); if (!block || xa_empty(&block->ports)) { tcf_action_inc_overlimit_qstats(&m->common); return retval; } if (is_redirect) return tcf_blockcast_redir(skb, m, block, m_eaction, exception_ifindex, retval); /* If it's not redirect, it is mirror */ return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex, retval); } TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); int retval = READ_ONCE(m->tcf_action); unsigned int nest_level; bool m_mac_header_xmit; struct net_device *dev; int m_eaction; u32 blockid; nest_level = __this_cpu_inc_return(mirred_nest_level); if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); retval = TC_ACT_SHOT; goto dec_nest_level; } tcf_lastuse_update(&m->tcf_tm); tcf_action_update_bstats(&m->common, skb); blockid = READ_ONCE(m->tcfm_blockid); if (blockid) { retval = tcf_blockcast(skb, m, blockid, res, retval); goto dec_nest_level; } dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); goto dec_nest_level; } m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); m_eaction = READ_ONCE(m->tcfm_eaction); retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, retval); dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; } static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_mirred *m = to_mirred(a); struct tcf_t *tm = &m->tcf_tm; tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); struct tc_mirred opt = { .index = m->tcf_index, .refcnt = refcount_read(&m->tcf_refcnt) - ref, .bindcnt = atomic_read(&m->tcf_bindcnt) - bind, }; struct net_device *dev; struct tcf_t t; u32 blockid; spin_lock_bh(&m->tcf_lock); opt.action = m->tcf_action; opt.eaction = m->tcfm_eaction; dev = tcf_mirred_dev_dereference(m); if (dev) opt.ifindex = dev->ifindex; if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; blockid = m->tcfm_blockid; if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid)) goto nla_put_failure; tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; spin_unlock_bh(&m->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&m->tcf_lock); nlmsg_trim(skb, b); return -1; } static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; ASSERT_RTNL(); if (event == NETDEV_UNREGISTER) { spin_lock(&mirred_list_lock); list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { netdev_put(dev, &m->tcfm_dev_tracker); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); } spin_unlock_bh(&m->tcf_lock); } spin_unlock(&mirred_list_lock); } return NOTIFY_DONE; } static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; static void tcf_mirred_dev_put(void *priv) { struct net_device *dev = priv; dev_put(dev); } static struct net_device * tcf_mirred_get_dev(const struct tc_action *a, tc_action_priv_destructor *destructor) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(m->tcfm_dev); if (dev) { dev_hold(dev); *destructor = tcf_mirred_dev_put; } rcu_read_unlock(); return dev; } static size_t tcf_mirred_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_mirred)); } static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, const struct tc_action *act) { entry->dev = act->ops->get_dev(act, &entry->destructor); if (!entry->dev) return; entry->destructor_priv = entry->dev; } static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else { NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload"); return -EOPNOTSUPP; } *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; if (is_tcf_mirred_egress_redirect(act)) fl_action->id = FLOW_ACTION_REDIRECT; else if (is_tcf_mirred_egress_mirror(act)) fl_action->id = FLOW_ACTION_MIRRED; else if (is_tcf_mirred_ingress_redirect(act)) fl_action->id = FLOW_ACTION_REDIRECT_INGRESS; else if (is_tcf_mirred_ingress_mirror(act)) fl_action->id = FLOW_ACTION_MIRRED_INGRESS; else return -EOPNOTSUPP; } return 0; } static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .id = TCA_ID_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred_act, .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, .get_fill_size = tcf_mirred_get_fill_size, .offload_act_setup = tcf_mirred_offload_act_setup, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, }; MODULE_ALIAS_NET_ACT("mirred"); static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_mirred_ops.net_id); } static struct pernet_operations mirred_net_ops = { .init = mirred_init_net, .exit_batch = mirred_exit_net, .id = &act_mirred_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); MODULE_DESCRIPTION("Device Mirror/redirect actions"); MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { int err = register_netdevice_notifier(&mirred_device_notifier); if (err) return err; pr_info("Mirror/redirect action on\n"); err = tcf_register_action(&act_mirred_ops, &mirred_net_ops); if (err) unregister_netdevice_notifier(&mirred_device_notifier); return err; } static void __exit mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops, &mirred_net_ops); unregister_netdevice_notifier(&mirred_device_notifier); } module_init(mirred_init_module); module_exit(mirred_cleanup_module);
44 44 39 44 44 42 44 44 38 42 34 1 33 42 35 44 44 34 43 39 39 39 38 35 39 43 1 1 43 44 44 44 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 /* * LZ4 - Fast LZ compression algorithm * Copyright (C) 2011 - 2016, Yann Collet. * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * You can contact the author at : * - LZ4 homepage : http://www.lz4.org * - LZ4 source repository : https://github.com/lz4/lz4 * * Changed for kernel usage by: * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ /*-************************************ * Dependencies **************************************/ #include "lz4defs.h" #include <linux/module.h> #include <linux/kernel.h> #include <linux/unaligned.h> static const int LZ4_minLength = (MFLIMIT + 1); static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1)); /*-****************************** * Compression functions ********************************/ static FORCE_INLINE U32 LZ4_hash4( U32 sequence, tableType_t const tableType) { if (tableType == byU16) return ((sequence * 2654435761U) >> ((MINMATCH * 8) - (LZ4_HASHLOG + 1))); else return ((sequence * 2654435761U) >> ((MINMATCH * 8) - LZ4_HASHLOG)); } static FORCE_INLINE U32 LZ4_hash5( U64 sequence, tableType_t const tableType) { const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG + 1 : LZ4_HASHLOG; #if LZ4_LITTLE_ENDIAN static const U64 prime5bytes = 889523592379ULL; return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); #else static const U64 prime8bytes = 11400714785074694791ULL; return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); #endif } static FORCE_INLINE U32 LZ4_hashPosition( const void *p, tableType_t const tableType) { #if LZ4_ARCH64 if (tableType == byU32) return LZ4_hash5(LZ4_read_ARCH(p), tableType); #endif return LZ4_hash4(LZ4_read32(p), tableType); } static void LZ4_putPositionOnHash( const BYTE *p, U32 h, void *tableBase, tableType_t const tableType, const BYTE *srcBase) { switch (tableType) { case byPtr: { const BYTE **hashTable = (const BYTE **)tableBase; hashTable[h] = p; return; } case byU32: { U32 *hashTable = (U32 *) tableBase; hashTable[h] = (U32)(p - srcBase); return; } case byU16: { U16 *hashTable = (U16 *) tableBase; hashTable[h] = (U16)(p - srcBase); return; } } } static FORCE_INLINE void LZ4_putPosition( const BYTE *p, void *tableBase, tableType_t tableType, const BYTE *srcBase) { U32 const h = LZ4_hashPosition(p, tableType); LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); } static const BYTE *LZ4_getPositionOnHash( U32 h, void *tableBase, tableType_t tableType, const BYTE *srcBase) { if (tableType == byPtr) { const BYTE **hashTable = (const BYTE **) tableBase; return hashTable[h]; } if (tableType == byU32) { const U32 * const hashTable = (U32 *) tableBase; return hashTable[h] + srcBase; } { /* default, to ensure a return */ const U16 * const hashTable = (U16 *) tableBase; return hashTable[h] + srcBase; } } static FORCE_INLINE const BYTE *LZ4_getPosition( const BYTE *p, void *tableBase, tableType_t tableType, const BYTE *srcBase) { U32 const h = LZ4_hashPosition(p, tableType); return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); } /* * LZ4_compress_generic() : * inlined, to ensure branches are decided at compilation time */ static FORCE_INLINE int LZ4_compress_generic( LZ4_stream_t_internal * const dictPtr, const char * const source, char * const dest, const int inputSize, const int maxOutputSize, const limitedOutput_directive outputLimited, const tableType_t tableType, const dict_directive dict, const dictIssue_directive dictIssue, const U32 acceleration) { const BYTE *ip = (const BYTE *) source; const BYTE *base; const BYTE *lowLimit; const BYTE * const lowRefLimit = ip - dictPtr->dictSize; const BYTE * const dictionary = dictPtr->dictionary; const BYTE * const dictEnd = dictionary + dictPtr->dictSize; const size_t dictDelta = dictEnd - (const BYTE *)source; const BYTE *anchor = (const BYTE *) source; const BYTE * const iend = ip + inputSize; const BYTE * const mflimit = iend - MFLIMIT; const BYTE * const matchlimit = iend - LASTLITERALS; BYTE *op = (BYTE *) dest; BYTE * const olimit = op + maxOutputSize; U32 forwardH; size_t refDelta = 0; /* Init conditions */ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { /* Unsupported inputSize, too large (or negative) */ return 0; } switch (dict) { case noDict: default: base = (const BYTE *)source; lowLimit = (const BYTE *)source; break; case withPrefix64k: base = (const BYTE *)source - dictPtr->currentOffset; lowLimit = (const BYTE *)source - dictPtr->dictSize; break; case usingExtDict: base = (const BYTE *)source - dictPtr->currentOffset; lowLimit = (const BYTE *)source; break; } if ((tableType == byU16) && (inputSize >= LZ4_64Klimit)) { /* Size too large (not within 64K limit) */ return 0; } if (inputSize < LZ4_minLength) { /* Input too small, no compression (all literals) */ goto _last_literals; } /* First Byte */ LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); ip++; forwardH = LZ4_hashPosition(ip, tableType); /* Main Loop */ for ( ; ; ) { const BYTE *match; BYTE *token; /* Find a match */ { const BYTE *forwardIp = ip; unsigned int step = 1; unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER; do { U32 const h = forwardH; ip = forwardIp; forwardIp += step; step = (searchMatchNb++ >> LZ4_SKIPTRIGGER); if (unlikely(forwardIp > mflimit)) goto _last_literals; match = LZ4_getPositionOnHash(h, dictPtr->hashTable, tableType, base); if (dict == usingExtDict) { if (match < (const BYTE *)source) { refDelta = dictDelta; lowLimit = dictionary; } else { refDelta = 0; lowLimit = (const BYTE *)source; } } forwardH = LZ4_hashPosition(forwardIp, tableType); LZ4_putPositionOnHash(ip, h, dictPtr->hashTable, tableType, base); } while (((dictIssue == dictSmall) ? (match < lowRefLimit) : 0) || ((tableType == byU16) ? 0 : (match + MAX_DISTANCE < ip)) || (LZ4_read32(match + refDelta) != LZ4_read32(ip))); } /* Catch up */ while (((ip > anchor) & (match + refDelta > lowLimit)) && (unlikely(ip[-1] == match[refDelta - 1]))) { ip--; match--; } /* Encode Literals */ { unsigned const int litLength = (unsigned int)(ip - anchor); token = op++; if ((outputLimited) && /* Check output buffer overflow */ (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength / 255) > olimit))) return 0; if (litLength >= RUN_MASK) { int len = (int)litLength - RUN_MASK; *token = (RUN_MASK << ML_BITS); for (; len >= 255; len -= 255) *op++ = 255; *op++ = (BYTE)len; } else *token = (BYTE)(litLength << ML_BITS); /* Copy Literals */ LZ4_wildCopy(op, anchor, op + litLength); op += litLength; } _next_match: /* Encode Offset */ LZ4_writeLE16(op, (U16)(ip - match)); op += 2; /* Encode MatchLength */ { unsigned int matchCode; if ((dict == usingExtDict) && (lowLimit == dictionary)) { const BYTE *limit; match += refDelta; limit = ip + (dictEnd - match); if (limit > matchlimit) limit = matchlimit; matchCode = LZ4_count(ip + MINMATCH, match + MINMATCH, limit); ip += MINMATCH + matchCode; if (ip == limit) { unsigned const int more = LZ4_count(ip, (const BYTE *)source, matchlimit); matchCode += more; ip += more; } } else { matchCode = LZ4_count(ip + MINMATCH, match + MINMATCH, matchlimit); ip += MINMATCH + matchCode; } if (outputLimited && /* Check output buffer overflow */ (unlikely(op + (1 + LASTLITERALS) + (matchCode >> 8) > olimit))) return 0; if (matchCode >= ML_MASK) { *token += ML_MASK; matchCode -= ML_MASK; LZ4_write32(op, 0xFFFFFFFF); while (matchCode >= 4 * 255) { op += 4; LZ4_write32(op, 0xFFFFFFFF); matchCode -= 4 * 255; } op += matchCode / 255; *op++ = (BYTE)(matchCode % 255); } else *token += (BYTE)(matchCode); } anchor = ip; /* Test end of chunk */ if (ip > mflimit) break; /* Fill table */ LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base); /* Test next position */ match = LZ4_getPosition(ip, dictPtr->hashTable, tableType, base); if (dict == usingExtDict) { if (match < (const BYTE *)source) { refDelta = dictDelta; lowLimit = dictionary; } else { refDelta = 0; lowLimit = (const BYTE *)source; } } LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1) && (match + MAX_DISTANCE >= ip) && (LZ4_read32(match + refDelta) == LZ4_read32(ip))) { token = op++; *token = 0; goto _next_match; } /* Prepare next loop */ forwardH = LZ4_hashPosition(++ip, tableType); } _last_literals: /* Encode Last Literals */ { size_t const lastRun = (size_t)(iend - anchor); if ((outputLimited) && /* Check output buffer overflow */ ((op - (BYTE *)dest) + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize)) return 0; if (lastRun >= RUN_MASK) { size_t accumulator = lastRun - RUN_MASK; *op++ = RUN_MASK << ML_BITS; for (; accumulator >= 255; accumulator -= 255) *op++ = 255; *op++ = (BYTE) accumulator; } else { *op++ = (BYTE)(lastRun << ML_BITS); } LZ4_memcpy(op, anchor, lastRun); op += lastRun; } /* End */ return (int) (((char *)op) - dest); } static int LZ4_compress_fast_extState( void *state, const char *source, char *dest, int inputSize, int maxOutputSize, int acceleration) { LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse; #if LZ4_ARCH64 const tableType_t tableType = byU32; #else const tableType_t tableType = byPtr; #endif LZ4_resetStream((LZ4_stream_t *)state); if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) { if (inputSize < LZ4_64Klimit) return LZ4_compress_generic(ctx, source, dest, inputSize, 0, noLimit, byU16, noDict, noDictIssue, acceleration); else return LZ4_compress_generic(ctx, source, dest, inputSize, 0, noLimit, tableType, noDict, noDictIssue, acceleration); } else { if (inputSize < LZ4_64Klimit) return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); else return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); } } int LZ4_compress_fast(const char *source, char *dest, int inputSize, int maxOutputSize, int acceleration, void *wrkmem) { return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize, maxOutputSize, acceleration); } EXPORT_SYMBOL(LZ4_compress_fast); int LZ4_compress_default(const char *source, char *dest, int inputSize, int maxOutputSize, void *wrkmem) { return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem); } EXPORT_SYMBOL(LZ4_compress_default); /*-****************************** * *_destSize() variant ********************************/ static int LZ4_compress_destSize_generic( LZ4_stream_t_internal * const ctx, const char * const src, char * const dst, int * const srcSizePtr, const int targetDstSize, const tableType_t tableType) { const BYTE *ip = (const BYTE *) src; const BYTE *base = (const BYTE *) src; const BYTE *lowLimit = (const BYTE *) src; const BYTE *anchor = ip; const BYTE * const iend = ip + *srcSizePtr; const BYTE * const mflimit = iend - MFLIMIT; const BYTE * const matchlimit = iend - LASTLITERALS; BYTE *op = (BYTE *) dst; BYTE * const oend = op + targetDstSize; BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */; BYTE * const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */); BYTE * const oMaxSeq = oMaxLit - 1 /* token */; U32 forwardH; /* Init conditions */ /* Impossible to store anything */ if (targetDstSize < 1) return 0; /* Unsupported input size, too large (or negative) */ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Size too large (not within 64K limit) */ if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit)) return 0; /* Input too small, no compression (all literals) */ if (*srcSizePtr < LZ4_minLength) goto _last_literals; /* First Byte */ *srcSizePtr = 0; LZ4_putPosition(ip, ctx->hashTable, tableType, base); ip++; forwardH = LZ4_hashPosition(ip, tableType); /* Main Loop */ for ( ; ; ) { const BYTE *match; BYTE *token; /* Find a match */ { const BYTE *forwardIp = ip; unsigned int step = 1; unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER; do { U32 h = forwardH; ip = forwardIp; forwardIp += step; step = (searchMatchNb++ >> LZ4_SKIPTRIGGER); if (unlikely(forwardIp > mflimit)) goto _last_literals; match = LZ4_getPositionOnHash(h, ctx->hashTable, tableType, base); forwardH = LZ4_hashPosition(forwardIp, tableType); LZ4_putPositionOnHash(ip, h, ctx->hashTable, tableType, base); } while (((tableType == byU16) ? 0 : (match + MAX_DISTANCE < ip)) || (LZ4_read32(match) != LZ4_read32(ip))); } /* Catch up */ while ((ip > anchor) && (match > lowLimit) && (unlikely(ip[-1] == match[-1]))) { ip--; match--; } /* Encode Literal length */ { unsigned int litLength = (unsigned int)(ip - anchor); token = op++; if (op + ((litLength + 240) / 255) + litLength > oMaxLit) { /* Not enough space for a last match */ op--; goto _last_literals; } if (litLength >= RUN_MASK) { unsigned int len = litLength - RUN_MASK; *token = (RUN_MASK<<ML_BITS); for (; len >= 255; len -= 255) *op++ = 255; *op++ = (BYTE)len; } else *token = (BYTE)(litLength << ML_BITS); /* Copy Literals */ LZ4_wildCopy(op, anchor, op + litLength); op += litLength; } _next_match: /* Encode Offset */ LZ4_writeLE16(op, (U16)(ip - match)); op += 2; /* Encode MatchLength */ { size_t matchLength = LZ4_count(ip + MINMATCH, match + MINMATCH, matchlimit); if (op + ((matchLength + 240)/255) > oMaxMatch) { /* Match description too long : reduce it */ matchLength = (15 - 1) + (oMaxMatch - op) * 255; } ip += MINMATCH + matchLength; if (matchLength >= ML_MASK) { *token += ML_MASK; matchLength -= ML_MASK; while (matchLength >= 255) { matchLength -= 255; *op++ = 255; } *op++ = (BYTE)matchLength; } else *token += (BYTE)(matchLength); } anchor = ip; /* Test end of block */ if (ip > mflimit) break; if (op > oMaxSeq) break; /* Fill table */ LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base); /* Test next position */ match = LZ4_getPosition(ip, ctx->hashTable, tableType, base); LZ4_putPosition(ip, ctx->hashTable, tableType, base); if ((match + MAX_DISTANCE >= ip) && (LZ4_read32(match) == LZ4_read32(ip))) { token = op++; *token = 0; goto _next_match; } /* Prepare next loop */ forwardH = LZ4_hashPosition(++ip, tableType); } _last_literals: /* Encode Last Literals */ { size_t lastRunSize = (size_t)(iend - anchor); if (op + 1 /* token */ + ((lastRunSize + 240) / 255) /* litLength */ + lastRunSize /* literals */ > oend) { /* adapt lastRunSize to fill 'dst' */ lastRunSize = (oend - op) - 1; lastRunSize -= (lastRunSize + 240) / 255; } ip = anchor + lastRunSize; if (lastRunSize >= RUN_MASK) { size_t accumulator = lastRunSize - RUN_MASK; *op++ = RUN_MASK << ML_BITS; for (; accumulator >= 255; accumulator -= 255) *op++ = 255; *op++ = (BYTE) accumulator; } else { *op++ = (BYTE)(lastRunSize<<ML_BITS); } LZ4_memcpy(op, anchor, lastRunSize); op += lastRunSize; } /* End */ *srcSizePtr = (int) (((const char *)ip) - src); return (int) (((char *)op) - dst); } static int LZ4_compress_destSize_extState( LZ4_stream_t *state, const char *src, char *dst, int *srcSizePtr, int targetDstSize) { #if LZ4_ARCH64 const tableType_t tableType = byU32; #else const tableType_t tableType = byPtr; #endif LZ4_resetStream(state); if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) { /* compression success is guaranteed */ return LZ4_compress_fast_extState( state, src, dst, *srcSizePtr, targetDstSize, 1); } else { if (*srcSizePtr < LZ4_64Klimit) return LZ4_compress_destSize_generic( &state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, byU16); else return LZ4_compress_destSize_generic( &state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, tableType); } } int LZ4_compress_destSize( const char *src, char *dst, int *srcSizePtr, int targetDstSize, void *wrkmem) { return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr, targetDstSize); } EXPORT_SYMBOL(LZ4_compress_destSize); /*-****************************** * Streaming functions ********************************/ void LZ4_resetStream(LZ4_stream_t *LZ4_stream) { memset(LZ4_stream, 0, sizeof(LZ4_stream_t)); } int LZ4_loadDict(LZ4_stream_t *LZ4_dict, const char *dictionary, int dictSize) { LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse; const BYTE *p = (const BYTE *)dictionary; const BYTE * const dictEnd = p + dictSize; const BYTE *base; if ((dict->initCheck) || (dict->currentOffset > 1 * GB)) { /* Uninitialized structure, or reuse overflow */ LZ4_resetStream(LZ4_dict); } if (dictSize < (int)HASH_UNIT) { dict->dictionary = NULL; dict->dictSize = 0; return 0; } if ((dictEnd - p) > 64 * KB) p = dictEnd - 64 * KB; dict->currentOffset += 64 * KB; base = p - dict->currentOffset; dict->dictionary = p; dict->dictSize = (U32)(dictEnd - p); dict->currentOffset += dict->dictSize; while (p <= dictEnd - HASH_UNIT) { LZ4_putPosition(p, dict->hashTable, byU32, base); p += 3; } return dict->dictSize; } EXPORT_SYMBOL(LZ4_loadDict); static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict, const BYTE *src) { if ((LZ4_dict->currentOffset > 0x80000000) || ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) { /* address space overflow */ /* rescale hash table */ U32 const delta = LZ4_dict->currentOffset - 64 * KB; const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; int i; for (i = 0; i < LZ4_HASH_SIZE_U32; i++) { if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i] = 0; else LZ4_dict->hashTable[i] -= delta; } LZ4_dict->currentOffset = 64 * KB; if (LZ4_dict->dictSize > 64 * KB) LZ4_dict->dictSize = 64 * KB; LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; } } int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize) { LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse; const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize; if ((U32)dictSize > 64 * KB) { /* useless to define a dictionary > 64 * KB */ dictSize = 64 * KB; } if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize; memmove(safeBuffer, previousDictEnd - dictSize, dictSize); dict->dictionary = (const BYTE *)safeBuffer; dict->dictSize = (U32)dictSize; return dictSize; } EXPORT_SYMBOL(LZ4_saveDict); int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source, char *dest, int inputSize, int maxOutputSize, int acceleration) { LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse; const BYTE * const dictEnd = streamPtr->dictionary + streamPtr->dictSize; const BYTE *smallest = (const BYTE *) source; if (streamPtr->initCheck) { /* Uninitialized structure detected */ return 0; } if ((streamPtr->dictSize > 0) && (smallest > dictEnd)) smallest = dictEnd; LZ4_renormDictT(streamPtr, smallest); if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; /* Check overlapping input/dictionary space */ { const BYTE *sourceEnd = (const BYTE *) source + inputSize; if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) { streamPtr->dictSize = (U32)(dictEnd - sourceEnd); if (streamPtr->dictSize > 64 * KB) streamPtr->dictSize = 64 * KB; if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; streamPtr->dictionary = dictEnd - streamPtr->dictSize; } } /* prefix mode : source data follows dictionary */ if (dictEnd == (const BYTE *)source) { int result; if ((streamPtr->dictSize < 64 * KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { result = LZ4_compress_generic( streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration); } else { result = LZ4_compress_generic( streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration); } streamPtr->dictSize += (U32)inputSize; streamPtr->currentOffset += (U32)inputSize; return result; } /* external dictionary mode */ { int result; if ((streamPtr->dictSize < 64 * KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { result = LZ4_compress_generic( streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration); } else { result = LZ4_compress_generic( streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration); } streamPtr->dictionary = (const BYTE *)source; streamPtr->dictSize = (U32)inputSize; streamPtr->currentOffset += (U32)inputSize; return result; } } EXPORT_SYMBOL(LZ4_compress_fast_continue); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("LZ4 compressor");
2 2 2 2 21 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 /* SPDX-License-Identifier: GPL-2.0 */ /* XDP user-space ring structure * Copyright(c) 2018 Intel Corporation. */ #ifndef _LINUX_XSK_QUEUE_H #define _LINUX_XSK_QUEUE_H #include <linux/types.h> #include <linux/if_xdp.h> #include <net/xdp_sock.h> #include <net/xsk_buff_pool.h> #include "xsk.h" struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; /* Hinder the adjacent cache prefetcher to prefetch the consumer * pointer if the producer pointer is touched and vice versa. */ u32 pad1 ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; u32 pad2 ____cacheline_aligned_in_smp; u32 flags; u32 pad3 ____cacheline_aligned_in_smp; }; /* Used for the RX and TX queues for packets */ struct xdp_rxtx_ring { struct xdp_ring ptrs; struct xdp_desc desc[] ____cacheline_aligned_in_smp; }; /* Used for the fill and completion queues for buffers */ struct xdp_umem_ring { struct xdp_ring ptrs; u64 desc[] ____cacheline_aligned_in_smp; }; struct xsk_queue { u32 ring_mask; u32 nentries; u32 cached_prod; u32 cached_cons; struct xdp_ring *ring; u64 invalid_descs; u64 queue_empty_descs; size_t ring_vmalloc_size; }; struct parsed_desc { u32 mb; u32 valid; }; /* The structure of the shared state of the rings are a simple * circular buffer, as outlined in * Documentation/core-api/circular-buffers.rst. For the Rx and * completion ring, the kernel is the producer and user space is the * consumer. For the Tx and fill rings, the kernel is the consumer and * user space is the producer. * * producer consumer * * if (LOAD ->consumer) { (A) LOAD.acq ->producer (C) * STORE $data LOAD $data * STORE.rel ->producer (B) STORE.rel ->consumer (D) * } * * (A) pairs with (D), and (B) pairs with (C). * * Starting with (B), it protects the data from being written after * the producer pointer. If this barrier was missing, the consumer * could observe the producer pointer being set and thus load the data * before the producer has written the new data. The consumer would in * this case load the old data. * * (C) protects the consumer from speculatively loading the data before * the producer pointer actually has been read. If we do not have this * barrier, some architectures could load old data as speculative loads * are not discarded as the CPU does not know there is a dependency * between ->producer and data. * * (A) is a control dependency that separates the load of ->consumer * from the stores of $data. In case ->consumer indicates there is no * room in the buffer to store $data we do not. The dependency will * order both of the stores after the loads. So no barrier is needed. * * (D) protects the load of the data to be observed to happen after the * store of the consumer pointer. If we did not have this memory * barrier, the producer could observe the consumer pointer being set * and overwrite the data with a new value before the consumer got the * chance to read the old value. The consumer would thus miss reading * the old entry and very likely read the new entry twice, once right * now and again after circling through the ring. */ /* The operations on the rings are the following: * * producer consumer * * RESERVE entries PEEK in the ring for entries * WRITE data into the ring READ data from the ring * SUBMIT entries RELEASE entries * * The producer reserves one or more entries in the ring. It can then * fill in these entries and finally submit them so that they can be * seen and read by the consumer. * * The consumer peeks into the ring to see if the producer has written * any new entries. If so, the consumer can then read these entries * and when it is done reading them release them back to the producer * so that the producer can use these slots to fill in new entries. * * The function names below reflect these operations. */ /* Functions that read and validate content from consumer rings. */ static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; u32 idx = cached_cons & q->ring_mask; *addr = ring->desc[idx]; } static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) { if (q->cached_cons != q->cached_prod) { __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr); return true; } return false; } static inline bool xp_unused_options_set(u32 options) { return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA); } static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { u64 addr = desc->addr - pool->tx_metadata_len; u64 len = desc->len + pool->tx_metadata_len; u64 offset = addr & (pool->chunk_size - 1); if (!desc->len) return false; if (offset + len > pool->chunk_size) return false; if (addr >= pool->addrs_cnt) return false; if (xp_unused_options_set(desc->options)) return false; return true; } static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; u64 len = desc->len + pool->tx_metadata_len; if (!desc->len) return false; if (len > pool->chunk_size) return false; if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || xp_desc_crosses_non_contig_pg(pool, addr, len)) return false; if (xp_unused_options_set(desc->options)) return false; return true; } static inline bool xp_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) : xp_aligned_validate_desc(pool, desc); } static inline bool xskq_has_descs(struct xsk_queue *q) { return q->cached_cons != q->cached_prod; } static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, struct xsk_buff_pool *pool) { if (!xp_validate_desc(pool, d)) { q->invalid_descs++; return false; } return true; } static inline bool xskq_cons_read_desc(struct xsk_queue *q, struct xdp_desc *desc, struct xsk_buff_pool *pool) { if (q->cached_cons != q->cached_prod) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; u32 idx = q->cached_cons & q->ring_mask; *desc = ring->desc[idx]; return xskq_cons_is_valid_desc(q, desc, pool); } q->queue_empty_descs++; return false; } static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) { q->cached_cons += cnt; } static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool, struct xdp_desc *desc, struct parsed_desc *parsed) { parsed->valid = xskq_cons_is_valid_desc(q, desc, pool); parsed->mb = xp_mb_desc(desc); } static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, u32 max) { u32 cached_cons = q->cached_cons, nb_entries = 0; struct xdp_desc *descs = pool->tx_descs; u32 total_descs = 0, nr_frags = 0; /* track first entry, if stumble upon *any* invalid descriptor, rewind * current packet that consists of frags and stop the processing */ while (cached_cons != q->cached_prod && nb_entries < max) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; u32 idx = cached_cons & q->ring_mask; struct parsed_desc parsed; descs[nb_entries] = ring->desc[idx]; cached_cons++; parse_desc(q, pool, &descs[nb_entries], &parsed); if (unlikely(!parsed.valid)) break; if (likely(!parsed.mb)) { total_descs += (nr_frags + 1); nr_frags = 0; } else { nr_frags++; if (nr_frags == pool->xdp_zc_max_segs) { nr_frags = 0; break; } } nb_entries++; } cached_cons -= nr_frags; /* Release valid plus any invalid entries */ xskq_cons_release_n(q, cached_cons - q->cached_cons); return total_descs; } /* Functions for consumers */ static inline void __xskq_cons_release(struct xsk_queue *q) { smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */ } static inline void __xskq_cons_peek(struct xsk_queue *q) { /* Refresh the local pointer */ q->cached_prod = smp_load_acquire(&q->ring->producer); /* C, matches B */ } static inline void xskq_cons_get_entries(struct xsk_queue *q) { __xskq_cons_release(q); __xskq_cons_peek(q); } static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max) { u32 entries = q->cached_prod - q->cached_cons; if (entries >= max) return max; __xskq_cons_peek(q); entries = q->cached_prod - q->cached_cons; return entries >= max ? max : entries; } static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) { if (q->cached_prod == q->cached_cons) xskq_cons_get_entries(q); return xskq_cons_read_addr_unchecked(q, addr); } static inline bool xskq_cons_peek_desc(struct xsk_queue *q, struct xdp_desc *desc, struct xsk_buff_pool *pool) { if (q->cached_prod == q->cached_cons) xskq_cons_get_entries(q); return xskq_cons_read_desc(q, desc, pool); } /* To improve performance in the xskq_cons_release functions, only update local state here. * Reflect this to global state when we get new entries from the ring in * xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop. */ static inline void xskq_cons_release(struct xsk_queue *q) { q->cached_cons++; } static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt) { q->cached_cons -= cnt; } static inline u32 xskq_cons_present_entries(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); } /* Functions for producers */ static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) { u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); if (free_entries >= max) return max; /* Refresh the local tail pointer */ q->cached_cons = READ_ONCE(q->ring->consumer); free_entries = q->nentries - (q->cached_prod - q->cached_cons); return free_entries >= max ? max : free_entries; } static inline bool xskq_prod_is_full(struct xsk_queue *q) { return xskq_prod_nb_free(q, 1) ? false : true; } static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt) { q->cached_prod -= cnt; } static inline int xskq_prod_reserve(struct xsk_queue *q) { if (xskq_prod_is_full(q)) return -ENOSPC; /* A, matches D */ q->cached_prod++; return 0; } static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; if (xskq_prod_is_full(q)) return -ENOSPC; /* A, matches D */ ring->desc[q->cached_prod++ & q->ring_mask] = addr; return 0; } static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, u32 nb_entries) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; u32 i, cached_prod; /* A, matches D */ cached_prod = q->cached_prod; for (i = 0; i < nb_entries; i++) ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr; q->cached_prod = cached_prod; } static inline int xskq_prod_reserve_desc(struct xsk_queue *q, u64 addr, u32 len, u32 flags) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; u32 idx; if (xskq_prod_is_full(q)) return -ENOBUFS; /* A, matches D */ idx = q->cached_prod++ & q->ring_mask; ring->desc[idx].addr = addr; ring->desc[idx].len = len; ring->desc[idx].options = flags; return 0; } static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) { smp_store_release(&q->ring->producer, idx); /* B, matches C */ } static inline void xskq_prod_submit(struct xsk_queue *q) { __xskq_prod_submit(q, q->cached_prod); } static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) { __xskq_prod_submit(q, q->ring->producer + nb_entries); } static inline bool xskq_prod_is_empty(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); } /* For both producers and consumers */ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) { return q ? q->invalid_descs : 0; } static inline u64 xskq_nb_queue_empty_descs(struct xsk_queue *q) { return q ? q->queue_empty_descs : 0; } struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); #endif /* _LINUX_XSK_QUEUE_H */
122 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* user-type.h: User-defined key type * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _KEYS_USER_TYPE_H #define _KEYS_USER_TYPE_H #include <linux/key.h> #include <linux/rcupdate.h> #ifdef CONFIG_KEYS /*****************************************************************************/ /* * the payload for a key of type "user" or "logon" * - once filled in and attached to a key: * - the payload struct is invariant may not be changed, only replaced * - the payload must be read with RCU procedures or with the key semaphore * held * - the payload may only be replaced with the key semaphore write-locked * - the key's data length is the size of the actual data, not including the * payload wrapper */ struct user_key_payload { struct rcu_head rcu; /* RCU destructor */ unsigned short datalen; /* length of this data */ char data[] __aligned(__alignof__(u64)); /* actual data */ }; extern struct key_type key_type_user; extern struct key_type key_type_logon; struct key_preparsed_payload; extern int user_preparse(struct key_preparsed_payload *prep); extern void user_free_preparse(struct key_preparsed_payload *prep); extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); extern void user_describe(const struct key *user, struct seq_file *m); extern long user_read(const struct key *key, char *buffer, size_t buflen); static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key) { return (struct user_key_payload *)dereference_key_rcu(key); } static inline struct user_key_payload *user_key_payload_locked(const struct key *key) { return (struct user_key_payload *)dereference_key_locked((struct key *)key); } #endif /* CONFIG_KEYS */ #endif /* _KEYS_USER_TYPE_H */
35 26 26 26 26 11 23 23 23 4 1 4 4 15 29 29 28 29 29 27 28 28 26 22 24 21 21 26 26 26 26 26 26 23 23 23 26 26 27 27 24 24 22 27 27 27 22 24 24 24 21 21 24 24 23 23 29 29 26 29 26 28 24 27 28 18 30 30 1 31 31 1 30 29 3 3 2 1 2 23 4 10 2 15 14 2 37 2 9 35 4 32 1 1 1 4 32 32 35 35 32 1 29 29 15 4 1 10 2 1 4 1 50 50 38 38 46 46 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 // SPDX-License-Identifier: 0BSD /* * LZMA2 decoder * * Authors: Lasse Collin <lasse.collin@tukaani.org> * Igor Pavlov <https://7-zip.org/> */ #include "xz_private.h" #include "xz_lzma2.h" /* * Range decoder initialization eats the first five bytes of each LZMA chunk. */ #define RC_INIT_BYTES 5 /* * Minimum number of usable input buffer to safely decode one LZMA symbol. * The worst case is that we decode 22 bits using probabilities and 26 * direct bits. This may decode at maximum of 20 bytes of input. However, * lzma_main() does an extra normalization before returning, thus we * need to put 21 here. */ #define LZMA_IN_REQUIRED 21 /* * Dictionary (history buffer) * * These are always true: * start <= pos <= full <= end * pos <= limit <= end * * In multi-call mode, also these are true: * end == size * size <= size_max * allocated <= size * * Most of these variables are size_t to support single-call mode, * in which the dictionary variables address the actual output * buffer directly. */ struct dictionary { /* Beginning of the history buffer */ uint8_t *buf; /* Old position in buf (before decoding more data) */ size_t start; /* Position in buf */ size_t pos; /* * How full dictionary is. This is used to detect corrupt input that * would read beyond the beginning of the uncompressed stream. */ size_t full; /* Write limit; we don't write to buf[limit] or later bytes. */ size_t limit; /* * End of the dictionary buffer. In multi-call mode, this is * the same as the dictionary size. In single-call mode, this * indicates the size of the output buffer. */ size_t end; /* * Size of the dictionary as specified in Block Header. This is used * together with "full" to detect corrupt input that would make us * read beyond the beginning of the uncompressed stream. */ uint32_t size; /* * Maximum allowed dictionary size in multi-call mode. * This is ignored in single-call mode. */ uint32_t size_max; /* * Amount of memory currently allocated for the dictionary. * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, * size_max is always the same as the allocated size.) */ uint32_t allocated; /* Operation mode */ enum xz_mode mode; }; /* Range decoder */ struct rc_dec { uint32_t range; uint32_t code; /* * Number of initializing bytes remaining to be read * by rc_read_init(). */ uint32_t init_bytes_left; /* * Buffer from which we read our input. It can be either * temp.buf or the caller-provided input buffer. */ const uint8_t *in; size_t in_pos; size_t in_limit; }; /* Probabilities for a length decoder. */ struct lzma_len_dec { /* Probability of match length being at least 10 */ uint16_t choice; /* Probability of match length being at least 18 */ uint16_t choice2; /* Probabilities for match lengths 2-9 */ uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; /* Probabilities for match lengths 10-17 */ uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; /* Probabilities for match lengths 18-273 */ uint16_t high[LEN_HIGH_SYMBOLS]; }; struct lzma_dec { /* Distances of latest four matches */ uint32_t rep0; uint32_t rep1; uint32_t rep2; uint32_t rep3; /* Types of the most recently seen LZMA symbols */ enum lzma_state state; /* * Length of a match. This is updated so that dict_repeat can * be called again to finish repeating the whole match. */ uint32_t len; /* * LZMA properties or related bit masks (number of literal * context bits, a mask derived from the number of literal * position bits, and a mask derived from the number * position bits) */ uint32_t lc; uint32_t literal_pos_mask; /* (1 << lp) - 1 */ uint32_t pos_mask; /* (1 << pb) - 1 */ /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ uint16_t is_match[STATES][POS_STATES_MAX]; /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ uint16_t is_rep[STATES]; /* * If 0, distance of a repeated match is rep0. * Otherwise check is_rep1. */ uint16_t is_rep0[STATES]; /* * If 0, distance of a repeated match is rep1. * Otherwise check is_rep2. */ uint16_t is_rep1[STATES]; /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ uint16_t is_rep2[STATES]; /* * If 1, the repeated match has length of one byte. Otherwise * the length is decoded from rep_len_decoder. */ uint16_t is_rep0_long[STATES][POS_STATES_MAX]; /* * Probability tree for the highest two bits of the match * distance. There is a separate probability tree for match * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. */ uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; /* * Probility trees for additional bits for match distance * when the distance is in the range [4, 127]. */ uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; /* * Probability tree for the lowest four bits of a match * distance that is equal to or greater than 128. */ uint16_t dist_align[ALIGN_SIZE]; /* Length of a normal match */ struct lzma_len_dec match_len_dec; /* Length of a repeated match */ struct lzma_len_dec rep_len_dec; /* Probabilities of literals */ uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; }; struct lzma2_dec { /* Position in xz_dec_lzma2_run(). */ enum lzma2_seq { SEQ_CONTROL, SEQ_UNCOMPRESSED_1, SEQ_UNCOMPRESSED_2, SEQ_COMPRESSED_0, SEQ_COMPRESSED_1, SEQ_PROPERTIES, SEQ_LZMA_PREPARE, SEQ_LZMA_RUN, SEQ_COPY } sequence; /* Next position after decoding the compressed size of the chunk. */ enum lzma2_seq next_sequence; /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ uint32_t uncompressed; /* * Compressed size of LZMA chunk or compressed/uncompressed * size of uncompressed chunk (64 KiB at maximum) */ uint32_t compressed; /* * True if dictionary reset is needed. This is false before * the first chunk (LZMA or uncompressed). */ bool need_dict_reset; /* * True if new LZMA properties are needed. This is false * before the first LZMA chunk. */ bool need_props; #ifdef XZ_DEC_MICROLZMA bool pedantic_microlzma; #endif }; struct xz_dec_lzma2 { /* * The order below is important on x86 to reduce code size and * it shouldn't hurt on other platforms. Everything up to and * including lzma.pos_mask are in the first 128 bytes on x86-32, * which allows using smaller instructions to access those * variables. On x86-64, fewer variables fit into the first 128 * bytes, but this is still the best order without sacrificing * the readability by splitting the structures. */ struct rc_dec rc; struct dictionary dict; struct lzma2_dec lzma2; struct lzma_dec lzma; /* * Temporary buffer which holds small number of input bytes between * decoder calls. See lzma2_lzma() for details. */ struct { uint32_t size; uint8_t buf[3 * LZMA_IN_REQUIRED]; } temp; }; /************** * Dictionary * **************/ /* * Reset the dictionary state. When in single-call mode, set up the beginning * of the dictionary to point to the actual output buffer. */ static void dict_reset(struct dictionary *dict, struct xz_buf *b) { if (DEC_IS_SINGLE(dict->mode)) { dict->buf = b->out + b->out_pos; dict->end = b->out_size - b->out_pos; } dict->start = 0; dict->pos = 0; dict->limit = 0; dict->full = 0; } /* Set dictionary write limit */ static void dict_limit(struct dictionary *dict, size_t out_max) { if (dict->end - dict->pos <= out_max) dict->limit = dict->end; else dict->limit = dict->pos + out_max; } /* Return true if at least one byte can be written into the dictionary. */ static inline bool dict_has_space(const struct dictionary *dict) { return dict->pos < dict->limit; } /* * Get a byte from the dictionary at the given distance. The distance is * assumed to valid, or as a special case, zero when the dictionary is * still empty. This special case is needed for single-call decoding to * avoid writing a '\0' to the end of the destination buffer. */ static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist) { size_t offset = dict->pos - dist - 1; if (dist >= dict->pos) offset += dict->end; return dict->full > 0 ? dict->buf[offset] : 0; } /* * Put one byte into the dictionary. It is assumed that there is space for it. */ static inline void dict_put(struct dictionary *dict, uint8_t byte) { dict->buf[dict->pos++] = byte; if (dict->full < dict->pos) dict->full = dict->pos; } /* * Repeat given number of bytes from the given distance. If the distance is * invalid, false is returned. On success, true is returned and *len is * updated to indicate how many bytes were left to be repeated. */ static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist) { size_t back; uint32_t left; if (dist >= dict->full || dist >= dict->size) return false; left = min_t(size_t, dict->limit - dict->pos, *len); *len -= left; back = dict->pos - dist - 1; if (dist >= dict->pos) back += dict->end; do { dict->buf[dict->pos++] = dict->buf[back++]; if (back == dict->end) back = 0; } while (--left > 0); if (dict->full < dict->pos) dict->full = dict->pos; return true; } /* Copy uncompressed data as is from input to dictionary and output buffers. */ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, uint32_t *left) { size_t copy_size; while (*left > 0 && b->in_pos < b->in_size && b->out_pos < b->out_size) { copy_size = min(b->in_size - b->in_pos, b->out_size - b->out_pos); if (copy_size > dict->end - dict->pos) copy_size = dict->end - dict->pos; if (copy_size > *left) copy_size = *left; *left -= copy_size; /* * If doing in-place decompression in single-call mode and the * uncompressed size of the file is larger than the caller * thought (i.e. it is invalid input!), the buffers below may * overlap and cause undefined behavior with memcpy(). * With valid inputs memcpy() would be fine here. */ memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size); dict->pos += copy_size; if (dict->full < dict->pos) dict->full = dict->pos; if (DEC_IS_MULTI(dict->mode)) { if (dict->pos == dict->end) dict->pos = 0; /* * Like above but for multi-call mode: use memmove() * to avoid undefined behavior with invalid input. */ memmove(b->out + b->out_pos, b->in + b->in_pos, copy_size); } dict->start = dict->pos; b->out_pos += copy_size; b->in_pos += copy_size; } } #ifdef XZ_DEC_MICROLZMA # define DICT_FLUSH_SUPPORTS_SKIPPING true #else # define DICT_FLUSH_SUPPORTS_SKIPPING false #endif /* * Flush pending data from dictionary to b->out. It is assumed that there is * enough space in b->out. This is guaranteed because caller uses dict_limit() * before decoding data into the dictionary. */ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) { size_t copy_size = dict->pos - dict->start; if (DEC_IS_MULTI(dict->mode)) { if (dict->pos == dict->end) dict->pos = 0; /* * These buffers cannot overlap even if doing in-place * decompression because in multi-call mode dict->buf * has been allocated by us in this file; it's not * provided by the caller like in single-call mode. * * With MicroLZMA, b->out can be NULL to skip bytes that * the caller doesn't need. This cannot be done with XZ * because it would break BCJ filters. */ if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL) memcpy(b->out + b->out_pos, dict->buf + dict->start, copy_size); } dict->start = dict->pos; b->out_pos += copy_size; return copy_size; } /***************** * Range decoder * *****************/ /* Reset the range decoder. */ static void rc_reset(struct rc_dec *rc) { rc->range = (uint32_t)-1; rc->code = 0; rc->init_bytes_left = RC_INIT_BYTES; } /* * Read the first five initial bytes into rc->code if they haven't been * read already. (Yes, the first byte gets completely ignored.) */ static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b) { while (rc->init_bytes_left > 0) { if (b->in_pos == b->in_size) return false; rc->code = (rc->code << 8) + b->in[b->in_pos++]; --rc->init_bytes_left; } return true; } /* Return true if there may not be enough input for the next decoding loop. */ static inline bool rc_limit_exceeded(const struct rc_dec *rc) { return rc->in_pos > rc->in_limit; } /* * Return true if it is possible (from point of view of range decoder) that * we have reached the end of the LZMA chunk. */ static inline bool rc_is_finished(const struct rc_dec *rc) { return rc->code == 0; } /* Read the next input byte if needed. */ static __always_inline void rc_normalize(struct rc_dec *rc) { if (rc->range < RC_TOP_VALUE) { rc->range <<= RC_SHIFT_BITS; rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++]; } } /* * Decode one bit. In some versions, this function has been split in three * functions so that the compiler is supposed to be able to more easily avoid * an extra branch. In this particular version of the LZMA decoder, this * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 * on x86). Using a non-split version results in nicer looking code too. * * NOTE: This must return an int. Do not make it return a bool or the speed * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) */ static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob) { uint32_t bound; int bit; rc_normalize(rc); bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob; if (rc->code < bound) { rc->range = bound; *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS; bit = 0; } else { rc->range -= bound; rc->code -= bound; *prob -= *prob >> RC_MOVE_BITS; bit = 1; } return bit; } /* Decode a bittree starting from the most significant bit. */ static __always_inline uint32_t rc_bittree(struct rc_dec *rc, uint16_t *probs, uint32_t limit) { uint32_t symbol = 1; do { if (rc_bit(rc, &probs[symbol])) symbol = (symbol << 1) + 1; else symbol <<= 1; } while (symbol < limit); return symbol; } /* Decode a bittree starting from the least significant bit. */ static __always_inline void rc_bittree_reverse(struct rc_dec *rc, uint16_t *probs, uint32_t *dest, uint32_t limit) { uint32_t symbol = 1; uint32_t i = 0; do { if (rc_bit(rc, &probs[symbol])) { symbol = (symbol << 1) + 1; *dest += 1 << i; } else { symbol <<= 1; } } while (++i < limit); } /* Decode direct bits (fixed fifty-fifty probability) */ static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit) { uint32_t mask; do { rc_normalize(rc); rc->range >>= 1; rc->code -= rc->range; mask = (uint32_t)0 - (rc->code >> 31); rc->code += rc->range & mask; *dest = (*dest << 1) + (mask + 1); } while (--limit > 0); } /******** * LZMA * ********/ /* Get pointer to literal coder probability array. */ static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s) { uint32_t prev_byte = dict_get(&s->dict, 0); uint32_t low = prev_byte >> (8 - s->lzma.lc); uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc; return s->lzma.literal[low + high]; } /* Decode a literal (one 8-bit byte) */ static void lzma_literal(struct xz_dec_lzma2 *s) { uint16_t *probs; uint32_t symbol; uint32_t match_byte; uint32_t match_bit; uint32_t offset; uint32_t i; probs = lzma_literal_probs(s); if (lzma_state_is_literal(s->lzma.state)) { symbol = rc_bittree(&s->rc, probs, 0x100); } else { symbol = 1; match_byte = dict_get(&s->dict, s->lzma.rep0) << 1; offset = 0x100; do { match_bit = match_byte & offset; match_byte <<= 1; i = offset + match_bit + symbol; if (rc_bit(&s->rc, &probs[i])) { symbol = (symbol << 1) + 1; offset &= match_bit; } else { symbol <<= 1; offset &= ~match_bit; } } while (symbol < 0x100); } dict_put(&s->dict, (uint8_t)symbol); lzma_state_literal(&s->lzma.state); } /* Decode the length of the match into s->lzma.len. */ static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, uint32_t pos_state) { uint16_t *probs; uint32_t limit; if (!rc_bit(&s->rc, &l->choice)) { probs = l->low[pos_state]; limit = LEN_LOW_SYMBOLS; s->lzma.len = MATCH_LEN_MIN; } else { if (!rc_bit(&s->rc, &l->choice2)) { probs = l->mid[pos_state]; limit = LEN_MID_SYMBOLS; s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; } else { probs = l->high; limit = LEN_HIGH_SYMBOLS; s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; } } s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit; } /* Decode a match. The distance will be stored in s->lzma.rep0. */ static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) { uint16_t *probs; uint32_t dist_slot; uint32_t limit; lzma_state_match(&s->lzma.state); s->lzma.rep3 = s->lzma.rep2; s->lzma.rep2 = s->lzma.rep1; s->lzma.rep1 = s->lzma.rep0; lzma_len(s, &s->lzma.match_len_dec, pos_state); probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)]; dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS; if (dist_slot < DIST_MODEL_START) { s->lzma.rep0 = dist_slot; } else { limit = (dist_slot >> 1) - 1; s->lzma.rep0 = 2 + (dist_slot & 1); if (dist_slot < DIST_MODEL_END) { s->lzma.rep0 <<= limit; probs = s->lzma.dist_special + s->lzma.rep0 - dist_slot - 1; rc_bittree_reverse(&s->rc, probs, &s->lzma.rep0, limit); } else { rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS); s->lzma.rep0 <<= ALIGN_BITS; rc_bittree_reverse(&s->rc, s->lzma.dist_align, &s->lzma.rep0, ALIGN_BITS); } } } /* * Decode a repeated match. The distance is one of the four most recently * seen matches. The distance will be stored in s->lzma.rep0. */ static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) { uint32_t tmp; if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) { if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[ s->lzma.state][pos_state])) { lzma_state_short_rep(&s->lzma.state); s->lzma.len = 1; return; } } else { if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) { tmp = s->lzma.rep1; } else { if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) { tmp = s->lzma.rep2; } else { tmp = s->lzma.rep3; s->lzma.rep3 = s->lzma.rep2; } s->lzma.rep2 = s->lzma.rep1; } s->lzma.rep1 = s->lzma.rep0; s->lzma.rep0 = tmp; } lzma_state_long_rep(&s->lzma.state); lzma_len(s, &s->lzma.rep_len_dec, pos_state); } /* LZMA decoder core */ static bool lzma_main(struct xz_dec_lzma2 *s) { uint32_t pos_state; /* * If the dictionary was reached during the previous call, try to * finish the possibly pending repeat in the dictionary. */ if (dict_has_space(&s->dict) && s->lzma.len > 0) dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0); /* * Decode more LZMA symbols. One iteration may consume up to * LZMA_IN_REQUIRED - 1 bytes. */ while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) { pos_state = s->dict.pos & s->lzma.pos_mask; if (!rc_bit(&s->rc, &s->lzma.is_match[ s->lzma.state][pos_state])) { lzma_literal(s); } else { if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state])) lzma_rep_match(s, pos_state); else lzma_match(s, pos_state); if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0)) return false; } } /* * Having the range decoder always normalized when we are outside * this function makes it easier to correctly handle end of the chunk. */ rc_normalize(&s->rc); return true; } /* * Reset the LZMA decoder and range decoder state. Dictionary is not reset * here, because LZMA state may be reset without resetting the dictionary. */ static void lzma_reset(struct xz_dec_lzma2 *s) { uint16_t *probs; size_t i; s->lzma.state = STATE_LIT_LIT; s->lzma.rep0 = 0; s->lzma.rep1 = 0; s->lzma.rep2 = 0; s->lzma.rep3 = 0; s->lzma.len = 0; /* * All probabilities are initialized to the same value. This hack * makes the code smaller by avoiding a separate loop for each * probability array. * * This could be optimized so that only that part of literal * probabilities that are actually required. In the common case * we would write 12 KiB less. */ probs = s->lzma.is_match[0]; for (i = 0; i < PROBS_TOTAL; ++i) probs[i] = RC_BIT_MODEL_TOTAL / 2; rc_reset(&s->rc); } /* * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks * from the decoded lp and pb values. On success, the LZMA decoder state is * reset and true is returned. */ static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props) { if (props > (4 * 5 + 4) * 9 + 8) return false; s->lzma.pos_mask = 0; while (props >= 9 * 5) { props -= 9 * 5; ++s->lzma.pos_mask; } s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1; s->lzma.literal_pos_mask = 0; while (props >= 9) { props -= 9; ++s->lzma.literal_pos_mask; } s->lzma.lc = props; if (s->lzma.lc + s->lzma.literal_pos_mask > 4) return false; s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1; lzma_reset(s); return true; } /********* * LZMA2 * *********/ /* * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This * wrapper function takes care of making the LZMA decoder's assumption safe. * * As long as there is plenty of input left to be decoded in the current LZMA * chunk, we decode directly from the caller-supplied input buffer until * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into * s->temp.buf, which (hopefully) gets filled on the next call to this * function. We decode a few bytes from the temporary buffer so that we can * continue decoding from the caller-supplied input buffer again. */ static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) { size_t in_avail; uint32_t tmp; in_avail = b->in_size - b->in_pos; if (s->temp.size > 0 || s->lzma2.compressed == 0) { tmp = 2 * LZMA_IN_REQUIRED - s->temp.size; if (tmp > s->lzma2.compressed - s->temp.size) tmp = s->lzma2.compressed - s->temp.size; if (tmp > in_avail) tmp = in_avail; memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp); if (s->temp.size + tmp == s->lzma2.compressed) { memzero(s->temp.buf + s->temp.size + tmp, sizeof(s->temp.buf) - s->temp.size - tmp); s->rc.in_limit = s->temp.size + tmp; } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) { s->temp.size += tmp; b->in_pos += tmp; return true; } else { s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED; } s->rc.in = s->temp.buf; s->rc.in_pos = 0; if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp) return false; s->lzma2.compressed -= s->rc.in_pos; if (s->rc.in_pos < s->temp.size) { s->temp.size -= s->rc.in_pos; memmove(s->temp.buf, s->temp.buf + s->rc.in_pos, s->temp.size); return true; } b->in_pos += s->rc.in_pos - s->temp.size; s->temp.size = 0; } in_avail = b->in_size - b->in_pos; if (in_avail >= LZMA_IN_REQUIRED) { s->rc.in = b->in; s->rc.in_pos = b->in_pos; if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED) s->rc.in_limit = b->in_pos + s->lzma2.compressed; else s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED; if (!lzma_main(s)) return false; in_avail = s->rc.in_pos - b->in_pos; if (in_avail > s->lzma2.compressed) return false; s->lzma2.compressed -= in_avail; b->in_pos = s->rc.in_pos; } in_avail = b->in_size - b->in_pos; if (in_avail < LZMA_IN_REQUIRED) { if (in_avail > s->lzma2.compressed) in_avail = s->lzma2.compressed; memcpy(s->temp.buf, b->in + b->in_pos, in_avail); s->temp.size = in_avail; b->in_pos += in_avail; } return true; } /* * Take care of the LZMA2 control layer, and forward the job of actual LZMA * decoding or copying of uncompressed chunks to other functions. */ enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b) { uint32_t tmp; while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) { switch (s->lzma2.sequence) { case SEQ_CONTROL: /* * LZMA2 control byte * * Exact values: * 0x00 End marker * 0x01 Dictionary reset followed by * an uncompressed chunk * 0x02 Uncompressed chunk (no dictionary reset) * * Highest three bits (s->control & 0xE0): * 0xE0 Dictionary reset, new properties and state * reset, followed by LZMA compressed chunk * 0xC0 New properties and state reset, followed * by LZMA compressed chunk (no dictionary * reset) * 0xA0 State reset using old properties, * followed by LZMA compressed chunk (no * dictionary reset) * 0x80 LZMA chunk (no dictionary or state reset) * * For LZMA compressed chunks, the lowest five bits * (s->control & 1F) are the highest bits of the * uncompressed size (bits 16-20). * * A new LZMA2 stream must begin with a dictionary * reset. The first LZMA chunk must set new * properties and reset the LZMA state. * * Values that don't match anything described above * are invalid and we return XZ_DATA_ERROR. */ tmp = b->in[b->in_pos++]; if (tmp == 0x00) return XZ_STREAM_END; if (tmp >= 0xE0 || tmp == 0x01) { s->lzma2.need_props = true; s->lzma2.need_dict_reset = false; dict_reset(&s->dict, b); } else if (s->lzma2.need_dict_reset) { return XZ_DATA_ERROR; } if (tmp >= 0x80) { s->lzma2.uncompressed = (tmp & 0x1F) << 16; s->lzma2.sequence = SEQ_UNCOMPRESSED_1; if (tmp >= 0xC0) { /* * When there are new properties, * state reset is done at * SEQ_PROPERTIES. */ s->lzma2.need_props = false; s->lzma2.next_sequence = SEQ_PROPERTIES; } else if (s->lzma2.need_props) { return XZ_DATA_ERROR; } else { s->lzma2.next_sequence = SEQ_LZMA_PREPARE; if (tmp >= 0xA0) lzma_reset(s); } } else { if (tmp > 0x02) return XZ_DATA_ERROR; s->lzma2.sequence = SEQ_COMPRESSED_0; s->lzma2.next_sequence = SEQ_COPY; } break; case SEQ_UNCOMPRESSED_1: s->lzma2.uncompressed += (uint32_t)b->in[b->in_pos++] << 8; s->lzma2.sequence = SEQ_UNCOMPRESSED_2; break; case SEQ_UNCOMPRESSED_2: s->lzma2.uncompressed += (uint32_t)b->in[b->in_pos++] + 1; s->lzma2.sequence = SEQ_COMPRESSED_0; break; case SEQ_COMPRESSED_0: s->lzma2.compressed = (uint32_t)b->in[b->in_pos++] << 8; s->lzma2.sequence = SEQ_COMPRESSED_1; break; case SEQ_COMPRESSED_1: s->lzma2.compressed += (uint32_t)b->in[b->in_pos++] + 1; s->lzma2.sequence = s->lzma2.next_sequence; break; case SEQ_PROPERTIES: if (!lzma_props(s, b->in[b->in_pos++])) return XZ_DATA_ERROR; s->lzma2.sequence = SEQ_LZMA_PREPARE; fallthrough; case SEQ_LZMA_PREPARE: if (s->lzma2.compressed < RC_INIT_BYTES) return XZ_DATA_ERROR; if (!rc_read_init(&s->rc, b)) return XZ_OK; s->lzma2.compressed -= RC_INIT_BYTES; s->lzma2.sequence = SEQ_LZMA_RUN; fallthrough; case SEQ_LZMA_RUN: /* * Set dictionary limit to indicate how much we want * to be encoded at maximum. Decode new data into the * dictionary. Flush the new data from dictionary to * b->out. Check if we finished decoding this chunk. * In case the dictionary got full but we didn't fill * the output buffer yet, we may run this loop * multiple times without changing s->lzma2.sequence. */ dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos, s->lzma2.uncompressed)); if (!lzma2_lzma(s, b)) return XZ_DATA_ERROR; s->lzma2.uncompressed -= dict_flush(&s->dict, b); if (s->lzma2.uncompressed == 0) { if (s->lzma2.compressed > 0 || s->lzma.len > 0 || !rc_is_finished(&s->rc)) return XZ_DATA_ERROR; rc_reset(&s->rc); s->lzma2.sequence = SEQ_CONTROL; } else if (b->out_pos == b->out_size || (b->in_pos == b->in_size && s->temp.size < s->lzma2.compressed)) { return XZ_OK; } break; case SEQ_COPY: dict_uncompressed(&s->dict, b, &s->lzma2.compressed); if (s->lzma2.compressed > 0) return XZ_OK; s->lzma2.sequence = SEQ_CONTROL; break; } } return XZ_OK; } struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, uint32_t dict_max) { struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) return NULL; s->dict.mode = mode; s->dict.size_max = dict_max; if (DEC_IS_PREALLOC(mode)) { s->dict.buf = vmalloc(dict_max); if (s->dict.buf == NULL) { kfree(s); return NULL; } } else if (DEC_IS_DYNALLOC(mode)) { s->dict.buf = NULL; s->dict.allocated = 0; } return s; } enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) { /* This limits dictionary size to 3 GiB to keep parsing simpler. */ if (props > 39) return XZ_OPTIONS_ERROR; s->dict.size = 2 + (props & 1); s->dict.size <<= (props >> 1) + 11; if (DEC_IS_MULTI(s->dict.mode)) { if (s->dict.size > s->dict.size_max) return XZ_MEMLIMIT_ERROR; s->dict.end = s->dict.size; if (DEC_IS_DYNALLOC(s->dict.mode)) { if (s->dict.allocated < s->dict.size) { s->dict.allocated = s->dict.size; vfree(s->dict.buf); s->dict.buf = vmalloc(s->dict.size); if (s->dict.buf == NULL) { s->dict.allocated = 0; return XZ_MEM_ERROR; } } } } s->lzma2.sequence = SEQ_CONTROL; s->lzma2.need_dict_reset = true; s->temp.size = 0; return XZ_OK; } void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) { if (DEC_IS_MULTI(s->dict.mode)) vfree(s->dict.buf); kfree(s); } #ifdef XZ_DEC_MICROLZMA /* This is a wrapper struct to have a nice struct name in the public API. */ struct xz_dec_microlzma { struct xz_dec_lzma2 s; }; enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, struct xz_buf *b) { struct xz_dec_lzma2 *s = &s_ptr->s; /* * sequence is SEQ_PROPERTIES before the first input byte, * SEQ_LZMA_PREPARE until a total of five bytes have been read, * and SEQ_LZMA_RUN for the rest of the input stream. */ if (s->lzma2.sequence != SEQ_LZMA_RUN) { if (s->lzma2.sequence == SEQ_PROPERTIES) { /* One byte is needed for the props. */ if (b->in_pos >= b->in_size) return XZ_OK; /* * Don't increment b->in_pos here. The same byte is * also passed to rc_read_init() which will ignore it. */ if (!lzma_props(s, ~b->in[b->in_pos])) return XZ_DATA_ERROR; s->lzma2.sequence = SEQ_LZMA_PREPARE; } /* * xz_dec_microlzma_reset() doesn't validate the compressed * size so we do it here. We have to limit the maximum size * to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice * round number and much more than users of this code should * ever need. */ if (s->lzma2.compressed < RC_INIT_BYTES || s->lzma2.compressed > (3U << 30)) return XZ_DATA_ERROR; if (!rc_read_init(&s->rc, b)) return XZ_OK; s->lzma2.compressed -= RC_INIT_BYTES; s->lzma2.sequence = SEQ_LZMA_RUN; dict_reset(&s->dict, b); } /* This is to allow increasing b->out_size between calls. */ if (DEC_IS_SINGLE(s->dict.mode)) s->dict.end = b->out_size - b->out_pos; while (true) { dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos, s->lzma2.uncompressed)); if (!lzma2_lzma(s, b)) return XZ_DATA_ERROR; s->lzma2.uncompressed -= dict_flush(&s->dict, b); if (s->lzma2.uncompressed == 0) { if (s->lzma2.pedantic_microlzma) { if (s->lzma2.compressed > 0 || s->lzma.len > 0 || !rc_is_finished(&s->rc)) return XZ_DATA_ERROR; } return XZ_STREAM_END; } if (b->out_pos == b->out_size) return XZ_OK; if (b->in_pos == b->in_size && s->temp.size < s->lzma2.compressed) return XZ_OK; } } struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, uint32_t dict_size) { struct xz_dec_microlzma *s; /* Restrict dict_size to the same range as in the LZMA2 code. */ if (dict_size < 4096 || dict_size > (3U << 30)) return NULL; s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) return NULL; s->s.dict.mode = mode; s->s.dict.size = dict_size; if (DEC_IS_MULTI(mode)) { s->s.dict.end = dict_size; s->s.dict.buf = vmalloc(dict_size); if (s->s.dict.buf == NULL) { kfree(s); return NULL; } } return s; } void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, uint32_t uncomp_size, int uncomp_size_is_exact) { /* * comp_size is validated in xz_dec_microlzma_run(). * uncomp_size can safely be anything. */ s->s.lzma2.compressed = comp_size; s->s.lzma2.uncompressed = uncomp_size; s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact; s->s.lzma2.sequence = SEQ_PROPERTIES; s->s.temp.size = 0; } void xz_dec_microlzma_end(struct xz_dec_microlzma *s) { if (DEC_IS_MULTI(s->s.dict.mode)) vfree(s->s.dict.buf); kfree(s); } #endif
13 100 121 121 99 96 99 8 98 119 121 120 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 /* SPDX-License-Identifier: GPL-2.0-only */ /* * sha512_base.h - core logic for SHA-512 implementations * * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> */ #ifndef _CRYPTO_SHA512_BASE_H #define _CRYPTO_SHA512_BASE_H #include <crypto/internal/hash.h> #include <crypto/sha2.h> #include <linux/crypto.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src, int blocks); static inline int sha384_base_init(struct shash_desc *desc) { struct sha512_state *sctx = shash_desc_ctx(desc); sctx->state[0] = SHA384_H0; sctx->state[1] = SHA384_H1; sctx->state[2] = SHA384_H2; sctx->state[3] = SHA384_H3; sctx->state[4] = SHA384_H4; sctx->state[5] = SHA384_H5; sctx->state[6] = SHA384_H6; sctx->state[7] = SHA384_H7; sctx->count[0] = sctx->count[1] = 0; return 0; } static inline int sha512_base_init(struct shash_desc *desc) { struct sha512_state *sctx = shash_desc_ctx(desc); sctx->state[0] = SHA512_H0; sctx->state[1] = SHA512_H1; sctx->state[2] = SHA512_H2; sctx->state[3] = SHA512_H3; sctx->state[4] = SHA512_H4; sctx->state[5] = SHA512_H5; sctx->state[6] = SHA512_H6; sctx->state[7] = SHA512_H7; sctx->count[0] = sctx->count[1] = 0; return 0; } static inline int sha512_base_do_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha512_block_fn *block_fn) { struct sha512_state *sctx = shash_desc_ctx(desc); unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; sctx->count[0] += len; if (sctx->count[0] < len) sctx->count[1]++; if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) { int blocks; if (partial) { int p = SHA512_BLOCK_SIZE - partial; memcpy(sctx->buf + partial, data, p); data += p; len -= p; block_fn(sctx, sctx->buf, 1); } blocks = len / SHA512_BLOCK_SIZE; len %= SHA512_BLOCK_SIZE; if (blocks) { block_fn(sctx, data, blocks); data += blocks * SHA512_BLOCK_SIZE; } partial = 0; } if (len) memcpy(sctx->buf + partial, data, len); return 0; } static inline int sha512_base_do_finalize(struct shash_desc *desc, sha512_block_fn *block_fn) { const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]); struct sha512_state *sctx = shash_desc_ctx(desc); __be64 *bits = (__be64 *)(sctx->buf + bit_offset); unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; sctx->buf[partial++] = 0x80; if (partial > bit_offset) { memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial); partial = 0; block_fn(sctx, sctx->buf, 1); } memset(sctx->buf + partial, 0x0, bit_offset - partial); bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); bits[1] = cpu_to_be64(sctx->count[0] << 3); block_fn(sctx, sctx->buf, 1); return 0; } static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) { unsigned int digest_size = crypto_shash_digestsize(desc->tfm); struct sha512_state *sctx = shash_desc_ctx(desc); __be64 *digest = (__be64 *)out; int i; for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64)) put_unaligned_be64(sctx->state[i], digest++); memzero_explicit(sctx, sizeof(*sctx)); return 0; } #endif /* _CRYPTO_SHA512_BASE_H */
10710 1576 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KERNEL_PRINTK__ #define __KERNEL_PRINTK__ #include <linux/stdarg.h> #include <linux/init.h> #include <linux/kern_levels.h> #include <linux/linkage.h> #include <linux/ratelimit_types.h> #include <linux/once_lite.h> struct console; extern const char linux_banner[]; extern const char linux_proc_banner[]; extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ #define PRINTK_MAX_SINGLE_HEADER_LEN 2 static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { switch (buffer[1]) { case '0' ... '7': case 'c': /* KERN_CONT */ return buffer[1]; } } return 0; } static inline const char *printk_skip_level(const char *buffer) { if (printk_get_level(buffer)) return buffer + 2; return buffer; } static inline const char *printk_skip_headers(const char *buffer) { while (printk_get_level(buffer)) buffer = printk_skip_level(buffer); return buffer; } /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT /* We show everything that is MORE important than this.. */ #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ /* * Default used to be hard-coded at 7, quiet used to be hardcoded at 4, * we're now allowing both to be set from kernel config. */ #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET int match_devname_and_update_preferred_console(const char *match, const char *name, const short idx); extern int console_printk[]; #define console_loglevel (console_printk[0]) #define default_message_loglevel (console_printk[1]) #define minimum_console_loglevel (console_printk[2]) #define default_console_loglevel (console_printk[3]) extern void console_verbose(void); /* strlen("ratelimit") + 1 */ #define DEVKMSG_STR_MAX_SIZE 10 extern char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE]; struct ctl_table; extern int suppress_printk; struct va_format { const char *fmt; va_list *va; }; /* * FW_BUG * Add this to a message where you are sure the firmware is buggy or behaves * really stupid or out of spec. Be aware that the responsible BIOS developer * should be able to fix this issue or at least get a concrete idea of the * problem by reading your message without the need of looking at the kernel * code. * * Use it for definite and high priority BIOS bugs. * * FW_WARN * Use it for not that clear (e.g. could the kernel messed up things already?) * and medium priority BIOS bugs. * * FW_INFO * Use this one if you want to tell the user or vendor about something * suspicious, but generally harmless related to the firmware. * * Use it for information or very low priority BIOS bugs. */ #define FW_BUG "[Firmware Bug]: " #define FW_WARN "[Firmware Warn]: " #define FW_INFO "[Firmware Info]: " /* * HW_ERR * Add this to a message for hardware errors, so that user can report * it to hardware vendor instead of LKML or software vendor. */ #define HW_ERR "[Hardware Error]: " /* * DEPRECATED * Add this to a message whenever you want to warn user space about the use * of a deprecated aspect of an API so they can stop using it */ #define DEPRECATED "[Deprecated]: " /* * Dummy printk for disabled debugging statements to use whilst maintaining * gcc's format checking. */ #define no_printk(fmt, ...) \ ({ \ if (0) \ _printk(fmt, ##__VA_ARGS__); \ 0; \ }) #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); #else static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif struct dev_printk_info; #ifdef CONFIG_PRINTK asmlinkage __printf(4, 0) int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args); asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); asmlinkage __printf(1, 2) __cold int _printk(const char *fmt, ...); /* * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); extern void __printk_deferred_enter(void); extern void __printk_deferred_exit(void); extern void printk_force_console_enter(void); extern void printk_force_console_exit(void); /* * The printk_deferred_enter/exit macros are available only as a hack for * some code paths that need to defer all printk console printing. Interrupts * must be disabled for the deferred duration. */ #define printk_deferred_enter() __printk_deferred_enter() #define printk_deferred_exit() __printk_deferred_exit() /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use * printk_ratelimited() or plain old __ratelimit(). */ extern int __printk_ratelimit(const char *func); #define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); extern int printk_delay_msec; extern int dmesg_restrict; extern void wake_up_klogd(void); char *log_buf_addr_get(void); u32 log_buf_len_get(void); void log_buf_vmcoreinfo_setup(void); void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); void printk_legacy_allow_panic_sync(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); void nbcon_atomic_flush_unsafe(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) { return 0; } static inline __printf(1, 2) __cold int _printk(const char *s, ...) { return 0; } static inline __printf(1, 2) __cold int _printk_deferred(const char *s, ...) { return 0; } static inline void printk_deferred_enter(void) { } static inline void printk_deferred_exit(void) { } static inline void printk_force_console_enter(void) { } static inline void printk_force_console_exit(void) { } static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec) { return false; } static inline void wake_up_klogd(void) { } static inline char *log_buf_addr_get(void) { return NULL; } static inline u32 log_buf_len_get(void) { return 0; } static inline void log_buf_vmcoreinfo_setup(void) { } static inline void setup_log_buf(int early) { } static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...) { } static inline void dump_stack_print_info(const char *log_lvl) { } static inline void show_regs_print_info(const char *log_lvl) { } static inline void dump_stack_lvl(const char *log_lvl) { } static inline void dump_stack(void) { } static inline void printk_trigger_flush(void) { } static inline void console_try_replay_all(void) { } static inline void printk_legacy_allow_panic_sync(void) { } static inline bool nbcon_device_try_acquire(struct console *con) { return false; } static inline void nbcon_device_release(struct console *con) { } static inline void nbcon_atomic_flush_unsafe(void) { } #endif bool this_cpu_in_panic(void); #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); extern void __printk_cpu_sync_put(void); #else #define __printk_cpu_sync_try_get() true #define __printk_cpu_sync_wait() #define __printk_cpu_sync_put() #endif /* CONFIG_SMP */ /** * printk_cpu_sync_get_irqsave() - Disable interrupts and acquire the printk * cpu-reentrant spinning lock. * @flags: Stack-allocated storage for saving local interrupt state, * to be passed to printk_cpu_sync_put_irqrestore(). * * If the lock is owned by another CPU, spin until it becomes available. * Interrupts are restored while spinning. * * CAUTION: This function must be used carefully. It does not behave like a * typical lock. Here are important things to watch out for... * * * This function is reentrant on the same CPU. Therefore the calling * code must not assume exclusive access to data if code accessing the * data can run reentrant or within NMI context on the same CPU. * * * If there exists usage of this function from NMI context, it becomes * unsafe to perform any type of locking or spinning to wait for other * CPUs after calling this function from any context. This includes * using spinlocks or any other busy-waiting synchronization methods. */ #define printk_cpu_sync_get_irqsave(flags) \ for (;;) { \ local_irq_save(flags); \ if (__printk_cpu_sync_try_get()) \ break; \ local_irq_restore(flags); \ __printk_cpu_sync_wait(); \ } /** * printk_cpu_sync_put_irqrestore() - Release the printk cpu-reentrant spinning * lock and restore interrupts. * @flags: Caller's saved interrupt state, from printk_cpu_sync_get_irqsave(). */ #define printk_cpu_sync_put_irqrestore(flags) \ do { \ __printk_cpu_sync_put(); \ local_irq_restore(flags); \ } while (0) extern int kptr_restrict; /** * pr_fmt - used by the pr_*() macros to generate the printk format string * @fmt: format string passed from a pr_*() macro * * This macro can be used to generate a unified format string for pr_*() * macros. A common use is to prefix all pr_*() messages in a file with a common * string. For example, defining this at the top of a source file: * * #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt * * would prefix all pr_info, pr_emerg... messages in the file with the module * name. */ #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif struct module; #ifdef CONFIG_PRINTK_INDEX struct pi_entry { const char *fmt; const char *func; const char *file; unsigned int line; /* * While printk and pr_* have the level stored in the string at compile * time, some subsystems dynamically add it at runtime through the * format string. For these dynamic cases, we allow the subsystem to * tell us the level at compile time. * * NULL indicates that the level, if any, is stored in fmt. */ const char *level; /* * The format string used by various subsystem specific printk() * wrappers to prefix the message. * * Note that the static prefix defined by the pr_fmt() macro is stored * directly in the message format (@fmt), not here. */ const char *subsys_fmt_prefix; } __packed; #define __printk_index_emit(_fmt, _level, _subsys_fmt_prefix) \ do { \ if (__builtin_constant_p(_fmt) && __builtin_constant_p(_level)) { \ /* * We check __builtin_constant_p multiple times here * for the same input because GCC will produce an error * if we try to assign a static variable to fmt if it * is not a constant, even with the outer if statement. */ \ static const struct pi_entry _entry \ __used = { \ .fmt = __builtin_constant_p(_fmt) ? (_fmt) : NULL, \ .func = __func__, \ .file = __FILE__, \ .line = __LINE__, \ .level = __builtin_constant_p(_level) ? (_level) : NULL, \ .subsys_fmt_prefix = _subsys_fmt_prefix,\ }; \ static const struct pi_entry *_entry_ptr \ __used __section(".printk_index") = &_entry; \ } \ } while (0) #else /* !CONFIG_PRINTK_INDEX */ #define __printk_index_emit(...) do {} while (0) #endif /* CONFIG_PRINTK_INDEX */ /* * Some subsystems have their own custom printk that applies a va_format to a * generic format, for example, to include a device number or other metadata * alongside the format supplied by the caller. * * In order to store these in the way they would be emitted by the printk * infrastructure, the subsystem provides us with the start, fixed string, and * any subsequent text in the format string. * * We take a variable argument list as pr_fmt/dev_fmt/etc are sometimes passed * as multiple arguments (eg: `"%s: ", "blah"`), and we must only take the * first one. * * subsys_fmt_prefix must be known at compile time, or compilation will fail * (since this is a mistake). If fmt or level is not known at compile time, no * index entry will be made (since this can legitimately happen). */ #define printk_index_subsys_emit(subsys_fmt_prefix, level, fmt, ...) \ __printk_index_emit(fmt, level, subsys_fmt_prefix) #define printk_index_wrap(_p_func, _fmt, ...) \ ({ \ __printk_index_emit(_fmt, NULL, NULL); \ _p_func(_fmt, ##__VA_ARGS__); \ }) /** * printk - print a kernel message * @fmt: format string * * This is printk(). It can be called from any context. We want it to work. * * If printk indexing is enabled, _printk() is called from printk_index_wrap. * Otherwise, printk is simply #defined to _printk. * * We try to grab the console_lock. If we succeed, it's easy - we log the * output and call the console drivers. If we fail to get the semaphore, we * place the output into the log buffer and return. The current holder of * the console_sem will notice the new output in console_unlock(); and will * send it to the consoles before releasing the lock. * * One effect of this deferred printing is that code which calls printk() and * then changes console_loglevel may break. This is because console_loglevel * is inspected when the actual printing occurs. * * See also: * printf(3) * * See the vsnprintf() documentation for format string extensions over C99. */ #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) #define printk_deferred(fmt, ...) \ printk_index_wrap(_printk_deferred, fmt, ##__VA_ARGS__) /** * pr_emerg - Print an emergency-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_emerg(fmt, ...) \ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) /** * pr_alert - Print an alert-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_alert(fmt, ...) \ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_crit - Print a critical-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_crit(fmt, ...) \ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) /** * pr_err - Print an error-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_err(fmt, ...) \ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) /** * pr_warn - Print a warning-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt() * to generate the format string. */ #define pr_warn(fmt, ...) \ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) /** * pr_notice - Print a notice-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_notice(fmt, ...) \ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) /** * pr_info - Print an info-level message * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /** * pr_cont - Continues a previous log message in the same line. * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_CONT loglevel. It should only be * used when continuing a log message with no newline ('\n') enclosed. Otherwise * it defaults back to KERN_DEFAULT loglevel. */ #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) /** * pr_devel - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is * defined. Otherwise it does nothing. * * It uses pr_fmt() to generate the format string. */ #ifdef DEBUG #define pr_devel(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #include <linux/dynamic_debug.h> /** * pr_debug - Print a debug-level message conditionally * @fmt: format string * @...: arguments for the format string * * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing. * * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses * pr_fmt() internally). */ #define pr_debug(fmt, ...) \ dynamic_pr_debug(fmt, ##__VA_ARGS__) #elif defined(DEBUG) #define pr_debug(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #ifdef CONFIG_PRINTK #define printk_once(fmt, ...) \ DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__) #define printk_deferred_once(fmt, ...) \ DO_ONCE_LITE(printk_deferred, fmt, ##__VA_ARGS__) #else #define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #define printk_deferred_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_once(fmt, ...) \ printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_once(fmt, ...) \ printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_once(fmt, ...) \ printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_once(fmt, ...) \ printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_once(fmt, ...) \ printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_once(fmt, ...) \ printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_once, don't do that... */ #if defined(DEBUG) #define pr_devel_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) #define pr_debug_once(fmt, ...) \ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_once(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* * ratelimited messages with local ratelimit_state, * no local ratelimit_state used in the !PRINTK case */ #ifdef CONFIG_PRINTK #define printk_ratelimited(fmt, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ \ if (__ratelimit(&_rs)) \ printk(fmt, ##__VA_ARGS__); \ }) #else #define printk_ratelimited(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_ratelimited(fmt, ...) \ printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_ratelimited(fmt, ...) \ printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_ratelimited(fmt, ...) \ printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_ratelimited(fmt, ...) \ printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_ratelimited(fmt, ...) \ printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_ratelimited(fmt, ...) \ printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_ratelimited, don't do that... */ #if defined(DEBUG) #define pr_devel_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) /* descriptor check is first to prevent flooding with "callbacks suppressed" */ #define pr_debug_ratelimited(fmt, ...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ if (DYNAMIC_DEBUG_BRANCH(descriptor) && \ __ratelimit(&_rs)) \ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define pr_debug_ratelimited(fmt, ...) \ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug_ratelimited(fmt, ...) \ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif extern const struct file_operations kmsg_fops; enum { DUMP_PREFIX_NONE, DUMP_PREFIX_ADDRESS, DUMP_PREFIX_OFFSET }; extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, char *linebuf, size_t linebuflen, bool ascii); #ifdef CONFIG_PRINTK extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); #else static inline void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type, const void *buf, size_t len) { } #endif #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #elif defined(DEBUG) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) #else static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { } #endif /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; * caller supplies trailing spaces for alignment if desired * @prefix_type: controls whether prefix of an offset, address, or none * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) * @buf: data blob to dump * @len: number of bytes in the @buf * * Calls print_hex_dump(), with log level of KERN_DEBUG, * rowsize of 16, groupsize of 1, and ASCII output included. */ #define print_hex_dump_bytes(prefix_str, prefix_type, buf, len) \ print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true) #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_EC_H #define _BCACHEFS_EC_H #include "ec_types.h" #include "buckets_types.h" #include "extents_types.h" int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ .min_val_size = 8, \ }) static inline unsigned stripe_csums_per_device(const struct bch_stripe *s) { return DIV_ROUND_UP(le16_to_cpu(s->sectors), 1 << s->csum_granularity_bits); } static inline unsigned stripe_csum_offset(const struct bch_stripe *s, unsigned dev, unsigned csum_idx) { EBUG_ON(s->csum_type >= BCH_CSUM_NR); unsigned csum_bytes = bch_crc_bytes[s->csum_type]; return sizeof(struct bch_stripe) + sizeof(struct bch_extent_ptr) * s->nr_blocks + (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes; } static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s, unsigned idx) { return stripe_csum_offset(s, s->nr_blocks, 0) + sizeof(u16) * idx; } static inline unsigned stripe_blockcount_get(const struct bch_stripe *s, unsigned idx) { return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx)); } static inline void stripe_blockcount_set(struct bch_stripe *s, unsigned idx, unsigned v) { __le16 *p = (void *) s + stripe_blockcount_offset(s, idx); *p = cpu_to_le16(v); } static inline unsigned stripe_val_u64s(const struct bch_stripe *s) { return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks), sizeof(u64)); } static inline void *stripe_csum(struct bch_stripe *s, unsigned block, unsigned csum_idx) { EBUG_ON(block >= s->nr_blocks); EBUG_ON(csum_idx >= stripe_csums_per_device(s)); return (void *) s + stripe_csum_offset(s, block, csum_idx); } static inline struct bch_csum stripe_csum_get(struct bch_stripe *s, unsigned block, unsigned csum_idx) { struct bch_csum csum = { 0 }; memcpy(&csum, stripe_csum(s, block, csum_idx), bch_crc_bytes[s->csum_type]); return csum; } static inline void stripe_csum_set(struct bch_stripe *s, unsigned block, unsigned csum_idx, struct bch_csum csum) { memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]); } static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr, const struct bch_extent_ptr *data_ptr, unsigned sectors) { return (data_ptr->dev == stripe_ptr->dev || data_ptr->dev == BCH_SB_MEMBER_INVALID || stripe_ptr->dev == BCH_SB_MEMBER_INVALID) && data_ptr->gen == stripe_ptr->gen && data_ptr->offset >= stripe_ptr->offset && data_ptr->offset < stripe_ptr->offset + sectors; } static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s, struct extent_ptr_decoded p) { unsigned nr_data = s->nr_blocks - s->nr_redundant; BUG_ON(!p.has_ec); if (p.ec.block >= nr_data) return false; return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr, le16_to_cpu(s->sectors)); } static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m, struct extent_ptr_decoded p) { unsigned nr_data = m->nr_blocks - m->nr_redundant; BUG_ON(!p.has_ec); if (p.ec.block >= nr_data) return false; return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr, m->sectors); } struct bch_read_bio; struct ec_stripe_buf { /* might not be buffering the entire stripe: */ unsigned offset; unsigned size; unsigned long valid[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; void *data[BCH_BKEY_PTRS_MAX]; __BKEY_PADDED(key, 255); }; struct ec_stripe_head; enum ec_stripe_ref { STRIPE_REF_io, STRIPE_REF_stripe, STRIPE_REF_NR }; struct ec_stripe_new { struct bch_fs *c; struct ec_stripe_head *h; struct mutex lock; struct list_head list; struct hlist_node hash; u64 idx; struct closure iodone; atomic_t ref[STRIPE_REF_NR]; int err; u8 nr_data; u8 nr_parity; bool allocated; bool pending; bool have_existing_stripe; unsigned long blocks_gotten[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; open_bucket_idx_t blocks[BCH_BKEY_PTRS_MAX]; struct disk_reservation res; struct ec_stripe_buf new_stripe; struct ec_stripe_buf existing_stripe; }; struct ec_stripe_head { struct list_head list; struct mutex lock; unsigned disk_label; unsigned algo; unsigned redundancy; enum bch_watermark watermark; bool insufficient_devs; unsigned long rw_devs_change_count; u64 nr_created; struct bch_devs_mask devs; unsigned nr_active_devs; unsigned blocksize; struct dev_stripe_state block_stripe; struct dev_stripe_state parity_stripe; struct ec_stripe_new *s; }; int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *, struct bkey_s_c); void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *); void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *); int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *); void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *); struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *, unsigned, unsigned, unsigned, enum bch_watermark, struct closure *); void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t); void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t); void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t); void bch2_do_stripe_deletes(struct bch_fs *); void bch2_ec_do_stripe_creates(struct bch_fs *); void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *); static inline void ec_stripe_new_get(struct ec_stripe_new *s, enum ec_stripe_ref ref) { atomic_inc(&s->ref[ref]); } static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, enum ec_stripe_ref ref) { BUG_ON(atomic_read(&s->ref[ref]) <= 0); if (atomic_dec_and_test(&s->ref[ref])) switch (ref) { case STRIPE_REF_stripe: bch2_ec_stripe_new_free(c, s); break; case STRIPE_REF_io: bch2_ec_do_stripe_creates(c); break; default: BUG(); } } int bch2_dev_remove_stripes(struct bch_fs *, unsigned); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_fs_ec_stop(struct bch_fs *); void bch2_fs_ec_flush(struct bch_fs *); int bch2_stripes_read(struct bch_fs *); void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *); void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_ec_exit(struct bch_fs *); void bch2_fs_ec_init_early(struct bch_fs *); int bch2_fs_ec_init(struct bch_fs *); #endif /* _BCACHEFS_EC_H */
18 13 18 30 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 /* * linux/fs/nls/nls_cp737.c * * Charset cp737 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, /* 0x90*/ 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, /* 0xa0*/ 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, 0x03c1, 0x03c3, 0x03c2, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, /* 0xd0*/ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, /* 0xe0*/ 0x03c9, 0x03ac, 0x03ad, 0x03ae, 0x03ca, 0x03af, 0x03cc, 0x03cd, 0x03cb, 0x03ce, 0x0386, 0x0388, 0x0389, 0x038a, 0x038c, 0x038e, /* 0xf0*/ 0x038f, 0x00b1, 0x2265, 0x2264, 0x03aa, 0x03ab, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0x00, 0x00, 0xfa, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, /* 0xf0-0xf7 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xea, 0x00, /* 0x80-0x87 */ 0xeb, 0xec, 0xed, 0x00, 0xee, 0x00, 0xef, 0xf0, /* 0x88-0x8f */ 0x00, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, /* 0x90-0x97 */ 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, /* 0x98-0x9f */ 0x8f, 0x90, 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, /* 0xa0-0xa7 */ 0x96, 0x97, 0xf4, 0xf5, 0xe1, 0xe2, 0xe3, 0xe5, /* 0xa8-0xaf */ 0x00, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, /* 0xb0-0xb7 */ 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, /* 0xb8-0xbf */ 0xa7, 0xa8, 0xaa, 0xa9, 0xab, 0xac, 0xad, 0xae, /* 0xc0-0xc7 */ 0xaf, 0xe0, 0xe4, 0xe8, 0xe6, 0xe7, 0xe9, 0x00, /* 0xc8-0xcf */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, /* 0x78-0x7f */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0xf9, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0xf3, 0xf2, 0x00, 0x00, /* 0x60-0x67 */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0xd5, 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, /* 0x50-0x57 */ 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, /* 0x58-0x5f */ 0xcc, 0xb5, 0xb6, 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, /* 0x60-0x67 */ 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xde, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, page03, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, page22, NULL, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x80-0x87 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0x88-0x8f */ 0xa8, 0xa9, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xe0, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xe1, 0xe2, 0xe3, 0xe5, 0xe6, 0xe7, /* 0xe8-0xef */ 0xe9, 0xf1, 0xf2, 0xf3, 0xe4, 0xe8, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x98-0x9f */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0xa0-0xa7 */ 0x90, 0x91, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0x97, 0xea, 0xeb, 0xec, 0xf4, 0xed, 0xee, 0xef, /* 0xe0-0xe7 */ 0xf5, 0xf0, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp737", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp737(void) { return register_nls(&table); } static void __exit exit_nls_cp737(void) { unregister_nls(&table); } module_init(init_nls_cp737) module_exit(exit_nls_cp737) MODULE_DESCRIPTION("NLS Codepage 737 (Greek)"); MODULE_LICENSE("Dual BSD/GPL");
15 13 2 2 1 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "eytzinger.h" #include "journal.h" #include "journal_seq_blacklist.h" #include "super-io.h" /* * journal_seq_blacklist machinery: * * To guarantee order of btree updates after a crash, we need to detect when a * btree node entry (bset) is newer than the newest journal entry that was * successfully written, and ignore it - effectively ignoring any btree updates * that didn't make it into the journal. * * If we didn't do this, we might have two btree nodes, a and b, both with * updates that weren't written to the journal yet: if b was updated after a, * but b was flushed and not a - oops; on recovery we'll find that the updates * to b happened, but not the updates to a that happened before it. * * Ignoring bsets that are newer than the newest journal entry is always safe, * because everything they contain will also have been journalled - and must * still be present in the journal on disk until a journal entry has been * written _after_ that bset was written. * * To accomplish this, bsets record the newest journal sequence number they * contain updates for; then, on startup, the btree code queries the journal * code to ask "Is this sequence number newer than the newest journal entry? If * so, ignore it." * * When this happens, we must blacklist that journal sequence number: the * journal must not write any entries with that sequence number, and it must * record that it was blacklisted so that a) on recovery we don't think we have * missing journal entries and b) so that the btree code continues to ignore * that bset, until that btree node is rewritten. */ static unsigned sb_blacklist_u64s(unsigned nr) { struct bch_sb_field_journal_seq_blacklist *bl; return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64); } int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) { struct bch_sb_field_journal_seq_blacklist *bl; unsigned i = 0, nr; int ret = 0; mutex_lock(&c->sb_lock); bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); nr = blacklist_nr_entries(bl); while (i < nr) { struct journal_seq_blacklist_entry *e = bl->start + i; if (end < le64_to_cpu(e->start)) break; if (start > le64_to_cpu(e->end)) { i++; continue; } /* * Entry is contiguous or overlapping with new entry: merge it * with new entry, and delete: */ start = min(start, le64_to_cpu(e->start)); end = max(end, le64_to_cpu(e->end)); array_remove_item(bl->start, nr, i); } bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, sb_blacklist_u64s(nr + 1)); if (!bl) { ret = -BCH_ERR_ENOSPC_sb_journal_seq_blacklist; goto out; } array_insert_item(bl->start, nr, i, ((struct journal_seq_blacklist_entry) { .start = cpu_to_le64(start), .end = cpu_to_le64(end), })); c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3); ret = bch2_write_super(c); out: mutex_unlock(&c->sb_lock); return ret ?: bch2_blacklist_table_initialize(c); } static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r) { const struct journal_seq_blacklist_table_entry *l = _l; const struct journal_seq_blacklist_table_entry *r = _r; return cmp_int(l->start, r->start); } bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq, bool dirty) { struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table; struct journal_seq_blacklist_table_entry search = { .start = seq }; int idx; if (!t) return false; idx = eytzinger0_find_le(t->entries, t->nr, sizeof(t->entries[0]), journal_seq_blacklist_table_cmp, &search); if (idx < 0) return false; BUG_ON(t->entries[idx].start > seq); if (seq >= t->entries[idx].end) return false; if (dirty) t->entries[idx].dirty = true; return true; } int bch2_blacklist_table_initialize(struct bch_fs *c) { struct bch_sb_field_journal_seq_blacklist *bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); struct journal_seq_blacklist_table *t; unsigned i, nr = blacklist_nr_entries(bl); if (!bl) return 0; t = kzalloc(struct_size(t, entries, nr), GFP_KERNEL); if (!t) return -BCH_ERR_ENOMEM_blacklist_table_init; t->nr = nr; for (i = 0; i < nr; i++) { t->entries[i].start = le64_to_cpu(bl->start[i].start); t->entries[i].end = le64_to_cpu(bl->start[i].end); } eytzinger0_sort(t->entries, t->nr, sizeof(t->entries[0]), journal_seq_blacklist_table_cmp, NULL); kfree(c->journal_seq_blacklist_table); c->journal_seq_blacklist_table = t; return 0; } static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { struct bch_sb_field_journal_seq_blacklist *bl = field_to_type(f, journal_seq_blacklist); unsigned i, nr = blacklist_nr_entries(bl); for (i = 0; i < nr; i++) { struct journal_seq_blacklist_entry *e = bl->start + i; if (le64_to_cpu(e->start) >= le64_to_cpu(e->end)) { prt_printf(err, "entry %u start >= end (%llu >= %llu)", i, le64_to_cpu(e->start), le64_to_cpu(e->end)); return -BCH_ERR_invalid_sb_journal_seq_blacklist; } if (i + 1 < nr && le64_to_cpu(e[0].end) > le64_to_cpu(e[1].start)) { prt_printf(err, "entry %u out of order with next entry (%llu > %llu)", i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start)); return -BCH_ERR_invalid_sb_journal_seq_blacklist; } } return 0; } static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field *f) { struct bch_sb_field_journal_seq_blacklist *bl = field_to_type(f, journal_seq_blacklist); struct journal_seq_blacklist_entry *i; unsigned nr = blacklist_nr_entries(bl); for (i = bl->start; i < bl->start + nr; i++) { if (i != bl->start) prt_printf(out, " "); prt_printf(out, "%llu-%llu", le64_to_cpu(i->start), le64_to_cpu(i->end)); } prt_newline(out); } const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = { .validate = bch2_sb_journal_seq_blacklist_validate, .to_text = bch2_sb_journal_seq_blacklist_to_text }; bool bch2_blacklist_entries_gc(struct bch_fs *c) { struct journal_seq_blacklist_entry *src, *dst; struct bch_sb_field_journal_seq_blacklist *bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); if (!bl) return false; unsigned nr = blacklist_nr_entries(bl); dst = bl->start; struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table; BUG_ON(nr != t->nr); unsigned i; for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr); src < bl->start + nr; src++, i = eytzinger0_next(i, nr)) { BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); BUG_ON(t->entries[i].end != le64_to_cpu(src->end)); if (t->entries[i].dirty || t->entries[i].end >= c->journal.oldest_seq_found_ondisk) *dst++ = *src; } unsigned new_nr = dst - bl->start; if (new_nr == nr) return false; bch_verbose(c, "nr blacklist entries was %u, now %u", nr, new_nr); bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, new_nr ? sb_blacklist_u64s(new_nr) : 0); BUG_ON(new_nr && !bl); return true; }
4 4 1 1 1 8 8 1 2 1 6 6 1 5 7 7 1 8 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 // SPDX-License-Identifier: GPL-2.0-only /* * Binary Increase Congestion control for TCP * Home page: * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC * This is from the implementation of BICTCP in * Lison-Xu, Kahaled Harfoush, and Injong Rhee. * "Binary Increase Congestion Control for Fast, Long Distance * Networks" in InfoComm 2004 * Available from: * http://netsrv.csc.ncsu.edu/export/bitcp.pdf * * Unless BIC is enabled and congestion window is large * this behaves the same as the original Reno. */ #include <linux/mm.h> #include <linux/module.h> #include <net/tcp.h> #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation * max_cwnd = snd_cwnd * beta */ #define BICTCP_B 4 /* * In binary search, * go to point (max+min)/N */ static int fast_convergence = 1; static int max_increment = 16; static int low_window = 14; static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ static int initial_ssthresh; static int smooth_part = 20; module_param(fast_convergence, int, 0644); MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); module_param(max_increment, int, 0644); MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search"); module_param(low_window, int, 0644); MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)"); module_param(beta, int, 0644); MODULE_PARM_DESC(beta, "beta for multiplicative increase"); module_param(initial_ssthresh, int, 0644); MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); module_param(smooth_part, int, 0644); MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wmax-B to Wmax"); /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 epoch_start; /* beginning of an epoch */ #define ACK_RATIO_SHIFT 4 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ }; static inline void bictcp_reset(struct bictcp *ca) { ca->cnt = 0; ca->last_max_cwnd = 0; ca->last_cwnd = 0; ca->last_time = 0; ca->epoch_start = 0; ca->delayed_ack = 2 << ACK_RATIO_SHIFT; } static void bictcp_init(struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); if (initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } /* * Compute congestion window to use. */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { if (ca->last_cwnd == cwnd && (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; ca->last_cwnd = cwnd; ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) /* record the beginning of an epoch */ ca->epoch_start = tcp_jiffies32; /* start off normal */ if (cwnd <= low_window) { ca->cnt = cwnd; return; } /* binary increase */ if (cwnd < ca->last_max_cwnd) { __u32 dist = (ca->last_max_cwnd - cwnd) / BICTCP_B; if (dist > max_increment) /* linear increase */ ca->cnt = cwnd / max_increment; else if (dist <= 1U) /* binary search increase */ ca->cnt = (cwnd * smooth_part) / BICTCP_B; else /* binary search increase */ ca->cnt = cwnd / dist; } else { /* slow start AMD linear increase */ if (cwnd < ca->last_max_cwnd + BICTCP_B) /* slow start */ ca->cnt = (cwnd * smooth_part) / BICTCP_B; else if (cwnd < ca->last_max_cwnd + max_increment*(BICTCP_B-1)) /* slow start */ ca->cnt = (cwnd * (BICTCP_B-1)) / (cwnd - ca->last_max_cwnd); else /* linear increase */ ca->cnt = cwnd / max_increment; } /* if in slow start or link utilization is very low */ if (ca->last_max_cwnd == 0) { if (ca->cnt > 20) /* increase cwnd 5% per RTT */ ca->cnt = 20; } ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack; if (ca->cnt == 0) /* cannot be zero */ ca->cnt = 1; } static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } bictcp_update(ca, tcp_snd_cwnd(tp)); tcp_cong_avoid_ai(tp, ca->cnt, acked); } /* * behave like Reno until low_window is reached, * then increase congestion window slowly */ static u32 bictcp_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else ca->last_max_cwnd = tcp_snd_cwnd(tp); if (tcp_snd_cwnd(tp) <= low_window) return max(tcp_snd_cwnd(tp) >> 1U, 2U); else return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) bictcp_reset(inet_csk_ca(sk)); } /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) { const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_state == TCP_CA_Open) { struct bictcp *ca = inet_csk_ca(sk); ca->delayed_ack += sample->pkts_acked - (ca->delayed_ack >> ACK_RATIO_SHIFT); } } static struct tcp_congestion_ops bictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, .set_state = bictcp_state, .undo_cwnd = tcp_reno_undo_cwnd, .pkts_acked = bictcp_acked, .owner = THIS_MODULE, .name = "bic", }; static int __init bictcp_register(void) { BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } static void __exit bictcp_unregister(void) { tcp_unregister_congestion_control(&bictcp); } module_init(bictcp_register); module_exit(bictcp_unregister); MODULE_AUTHOR("Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("BIC TCP");
4 4 9 2 6 3 4 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* * Route Plug-In * Copyright (c) 2000 by Abramo Bagnara <abramo@alsa-project.org> * * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include "pcm_plugin.h" static void zero_areas(struct snd_pcm_plugin_channel *dvp, int ndsts, snd_pcm_uframes_t frames, snd_pcm_format_t format) { int dst = 0; for (; dst < ndsts; ++dst) { if (dvp->wanted) snd_pcm_area_silence(&dvp->area, 0, frames, format); dvp->enabled = 0; dvp++; } } static inline void copy_area(const struct snd_pcm_plugin_channel *src_channel, struct snd_pcm_plugin_channel *dst_channel, snd_pcm_uframes_t frames, snd_pcm_format_t format) { dst_channel->enabled = 1; snd_pcm_area_copy(&src_channel->area, 0, &dst_channel->area, 0, frames, format); } static snd_pcm_sframes_t route_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { int nsrcs, ndsts, dst; struct snd_pcm_plugin_channel *dvp; snd_pcm_format_t format; if (snd_BUG_ON(!plugin || !src_channels || !dst_channels)) return -ENXIO; if (frames == 0) return 0; if (frames > dst_channels[0].frames) frames = dst_channels[0].frames; nsrcs = plugin->src_format.channels; ndsts = plugin->dst_format.channels; format = plugin->dst_format.format; dvp = dst_channels; if (nsrcs <= 1) { /* expand to all channels */ for (dst = 0; dst < ndsts; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; } return frames; } for (dst = 0; dst < ndsts && dst < nsrcs; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; src_channels++; } if (dst < ndsts) zero_areas(dvp, ndsts - dst, frames, format); return frames; } int snd_pcm_plugin_build_route(struct snd_pcm_substream *plug, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, struct snd_pcm_plugin **r_plugin) { struct snd_pcm_plugin *plugin; int err; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(src_format->rate != dst_format->rate)) return -ENXIO; if (snd_BUG_ON(src_format->format != dst_format->format)) return -ENXIO; err = snd_pcm_plugin_build(plug, "route conversion", src_format, dst_format, 0, &plugin); if (err < 0) return err; plugin->transfer = route_transfer; *r_plugin = plugin; return 0; }
2 12 16 17 8 17 3 1 2 3 33 32 11 3 1 16 16 3 29 39 56 53 71 49 5 14 2 2 36 29 2 28 24 9 15 16 43 30 508 1 353 193 110 282 67 184 29 33 17 15 13 4 3 11 11 11 17 17 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 // SPDX-License-Identifier: GPL-2.0-or-later /* scm.c - Socket level control messages processing. * * Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Alignment and value checking mods by Craig Metz */ #include <linux/module.h> #include <linux/signal.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/file.h> #include <linux/fcntl.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> #include <linux/pid_namespace.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/errqueue.h> #include <linux/io_uring.h> #include <linux/uaccess.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/compat.h> #include <net/scm.h> #include <net/cls_cgroup.h> #include <net/af_unix.h> /* * Only allow a user to send credentials, that they could set with * setu(g)id. */ static __inline__ int scm_check_creds(struct ucred *creds) { const struct cred *cred = current_cred(); kuid_t uid = make_kuid(cred->user_ns, creds->uid); kgid_t gid = make_kgid(cred->user_ns, creds->gid); if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) { return 0; } return -EPERM; } static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) { int *fdp = (int*)CMSG_DATA(cmsg); struct scm_fp_list *fpl = *fplp; struct file **fpp; int i, num; num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int); if (num <= 0) return 0; if (num > SCM_MAX_FD) return -EINVAL; if (!fpl) { fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT); if (!fpl) return -ENOMEM; *fplp = fpl; fpl->count = 0; fpl->count_unix = 0; fpl->max = SCM_MAX_FD; fpl->user = NULL; #if IS_ENABLED(CONFIG_UNIX) fpl->inflight = false; fpl->dead = false; fpl->edges = NULL; INIT_LIST_HEAD(&fpl->vertices); #endif } fpp = &fpl->fp[fpl->count]; if (fpl->count + num > fpl->max) return -EINVAL; /* * Verify the descriptors and increment the usage count. */ for (i=0; i< num; i++) { int fd = fdp[i]; struct file *file; if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; /* don't allow io_uring files */ if (io_is_uring_fops(file)) { fput(file); return -EINVAL; } if (unix_get_socket(file)) fpl->count_unix++; *fpp++ = file; fpl->count++; } if (!fpl->user) fpl->user = get_uid(current_user()); return num; } void __scm_destroy(struct scm_cookie *scm) { struct scm_fp_list *fpl = scm->fp; int i; if (fpl) { scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); free_uid(fpl->user); kfree(fpl); } } EXPORT_SYMBOL(__scm_destroy); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) { const struct proto_ops *ops = READ_ONCE(sock->ops); struct cmsghdr *cmsg; int err; for_each_cmsghdr(cmsg, msg) { err = -EINVAL; /* Verify that cmsg_len is at least sizeof(struct cmsghdr) */ /* The first check was omitted in <= 2.2.5. The reasoning was that parser checks cmsg_len in any case, so that additional check would be work duplication. But if cmsg_level is not SOL_SOCKET, we do not check for too short ancillary data object at all! Oops. OK, let's add it... */ if (!CMSG_OK(msg, cmsg)) goto error; if (cmsg->cmsg_level != SOL_SOCKET) continue; switch (cmsg->cmsg_type) { case SCM_RIGHTS: if (!ops || ops->family != PF_UNIX) goto error; err=scm_fp_copy(cmsg, &p->fp); if (err<0) goto error; break; case SCM_CREDENTIALS: { struct ucred creds; kuid_t uid; kgid_t gid; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred)); err = scm_check_creds(&creds); if (err) goto error; p->creds.pid = creds.pid; if (!p->pid || pid_vnr(p->pid) != creds.pid) { struct pid *pid; err = -ESRCH; pid = find_get_pid(creds.pid); if (!pid) goto error; put_pid(p->pid); p->pid = pid; } err = -EINVAL; uid = make_kuid(current_user_ns(), creds.uid); gid = make_kgid(current_user_ns(), creds.gid); if (!uid_valid(uid) || !gid_valid(gid)) goto error; p->creds.uid = uid; p->creds.gid = gid; break; } default: goto error; } } if (p->fp && !p->fp->count) { kfree(p->fp); p->fp = NULL; } return 0; error: scm_destroy(p); return err; } EXPORT_SYMBOL(__scm_send); int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { int cmlen = CMSG_LEN(len); if (msg->msg_flags & MSG_CMSG_COMPAT) return put_cmsg_compat(msg, level, type, len, data); if (!msg->msg_control || msg->msg_controllen < sizeof(struct cmsghdr)) { msg->msg_flags |= MSG_CTRUNC; return 0; /* XXX: return error? check spec. */ } if (msg->msg_controllen < cmlen) { msg->msg_flags |= MSG_CTRUNC; cmlen = msg->msg_controllen; } if (msg->msg_control_is_user) { struct cmsghdr __user *cm = msg->msg_control_user; check_object_size(data, cmlen - sizeof(*cm), true); if (!user_write_access_begin(cm, cmlen)) goto efault; unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); unsafe_put_user(level, &cm->cmsg_level, efault_end); unsafe_put_user(type, &cm->cmsg_type, efault_end); unsafe_copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm), efault_end); user_write_access_end(); } else { struct cmsghdr *cm = msg->msg_control; cm->cmsg_level = level; cm->cmsg_type = type; cm->cmsg_len = cmlen; memcpy(CMSG_DATA(cm), data, cmlen - sizeof(*cm)); } cmlen = min(CMSG_SPACE(len), msg->msg_controllen); if (msg->msg_control_is_user) msg->msg_control_user += cmlen; else msg->msg_control += cmlen; msg->msg_controllen -= cmlen; return 0; efault_end: user_write_access_end(); efault: return -EFAULT; } EXPORT_SYMBOL(put_cmsg); void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) { struct scm_timestamping64 tss; int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss); } EXPORT_SYMBOL(put_cmsg_scm_timestamping64); void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) { struct scm_timestamping tss; int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss); } EXPORT_SYMBOL(put_cmsg_scm_timestamping); static int scm_max_fds(struct msghdr *msg) { if (msg->msg_controllen <= sizeof(struct cmsghdr)) return 0; return (msg->msg_controllen - sizeof(struct cmsghdr)) / sizeof(int); } void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { struct cmsghdr __user *cm = (__force struct cmsghdr __user *)msg->msg_control_user; unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count); int __user *cmsg_data = CMSG_USER_DATA(cm); int err = 0, i; /* no use for FD passing from kernel space callers */ if (WARN_ON_ONCE(!msg->msg_control_is_user)) return; if (msg->msg_flags & MSG_CMSG_COMPAT) { scm_detach_fds_compat(msg, scm); return; } for (i = 0; i < fdmax; i++) { err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; } if (i > 0) { int cmlen = CMSG_LEN(i * sizeof(int)); err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i * sizeof(int)); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control_user += cmlen; msg->msg_controllen -= cmlen; } } if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) msg->msg_flags |= MSG_CTRUNC; /* * All of the files that fit in the message have had their usage counts * incremented, so we just free the list. */ __scm_destroy(scm); } EXPORT_SYMBOL(scm_detach_fds); struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) { struct scm_fp_list *new_fpl; int i; if (!fpl) return NULL; new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), GFP_KERNEL_ACCOUNT); if (new_fpl) { for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; new_fpl->user = get_uid(fpl->user); #if IS_ENABLED(CONFIG_UNIX) new_fpl->inflight = false; new_fpl->edges = NULL; INIT_LIST_HEAD(&new_fpl->vertices); #endif } return new_fpl; } EXPORT_SYMBOL(scm_fp_dup);
11 11 7 7 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ #include "rxe.h" #define RXE_POOL_TIMEOUT (200) #define RXE_POOL_ALIGN (16) static const struct rxe_type_info { const char *name; size_t size; size_t elem_offset; void (*cleanup)(struct rxe_pool_elem *elem); u32 min_index; u32 max_index; u32 max_elem; } rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "uc", .size = sizeof(struct rxe_ucontext), .elem_offset = offsetof(struct rxe_ucontext, elem), .min_index = 1, .max_index = RXE_MAX_UCONTEXT, .max_elem = RXE_MAX_UCONTEXT, }, [RXE_TYPE_PD] = { .name = "pd", .size = sizeof(struct rxe_pd), .elem_offset = offsetof(struct rxe_pd, elem), .min_index = 1, .max_index = RXE_MAX_PD, .max_elem = RXE_MAX_PD, }, [RXE_TYPE_AH] = { .name = "ah", .size = sizeof(struct rxe_ah), .elem_offset = offsetof(struct rxe_ah, elem), .min_index = RXE_MIN_AH_INDEX, .max_index = RXE_MAX_AH_INDEX, .max_elem = RXE_MAX_AH, }, [RXE_TYPE_SRQ] = { .name = "srq", .size = sizeof(struct rxe_srq), .elem_offset = offsetof(struct rxe_srq, elem), .cleanup = rxe_srq_cleanup, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, .max_elem = RXE_MAX_SRQ, }, [RXE_TYPE_QP] = { .name = "qp", .size = sizeof(struct rxe_qp), .elem_offset = offsetof(struct rxe_qp, elem), .cleanup = rxe_qp_cleanup, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, .max_elem = RXE_MAX_QP, }, [RXE_TYPE_CQ] = { .name = "cq", .size = sizeof(struct rxe_cq), .elem_offset = offsetof(struct rxe_cq, elem), .cleanup = rxe_cq_cleanup, .min_index = 1, .max_index = RXE_MAX_CQ, .max_elem = RXE_MAX_CQ, }, [RXE_TYPE_MR] = { .name = "mr", .size = sizeof(struct rxe_mr), .elem_offset = offsetof(struct rxe_mr, elem), .cleanup = rxe_mr_cleanup, .min_index = RXE_MIN_MR_INDEX, .max_index = RXE_MAX_MR_INDEX, .max_elem = RXE_MAX_MR, }, [RXE_TYPE_MW] = { .name = "mw", .size = sizeof(struct rxe_mw), .elem_offset = offsetof(struct rxe_mw, elem), .cleanup = rxe_mw_cleanup, .min_index = RXE_MIN_MW_INDEX, .max_index = RXE_MAX_MW_INDEX, .max_elem = RXE_MAX_MW, }, }; void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, enum rxe_elem_type type) { const struct rxe_type_info *info = &rxe_type_info[type]; memset(pool, 0, sizeof(*pool)); pool->rxe = rxe; pool->name = info->name; pool->type = type; pool->max_elem = info->max_elem; pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); pool->elem_offset = info->elem_offset; pool->cleanup = info->cleanup; atomic_set(&pool->num_elem, 0); xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); pool->limit.min = info->min_index; pool->limit.max = info->max_index; } void rxe_pool_cleanup(struct rxe_pool *pool) { WARN_ON(!xa_empty(&pool->xa)); } int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, bool sleepable) { int err = -EINVAL; gfp_t gfp_flags; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto err_cnt; elem->pool = pool; elem->obj = (u8 *)elem - pool->elem_offset; kref_init(&elem->ref_cnt); init_completion(&elem->complete); /* AH objects are unique in that the create_ah verb * can be called in atomic context. If the create_ah * call is not sleepable use GFP_ATOMIC. */ gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC; if (sleepable) might_sleep(); err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, &pool->next, gfp_flags); if (err < 0) goto err_cnt; return 0; err_cnt: atomic_dec(&pool->num_elem); return err; } void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { struct rxe_pool_elem *elem; struct xarray *xa = &pool->xa; void *obj; rcu_read_lock(); elem = xa_load(xa, index); if (elem && kref_get_unless_zero(&elem->ref_cnt)) obj = elem->obj; else obj = NULL; rcu_read_unlock(); return obj; } static void rxe_elem_release(struct kref *kref) { struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt); complete(&elem->complete); } int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) { struct rxe_pool *pool = elem->pool; struct xarray *xa = &pool->xa; int ret, err = 0; void *xa_ret; if (sleepable) might_sleep(); /* erase xarray entry to prevent looking up * the pool elem from its index */ xa_ret = xa_erase(xa, elem->index); WARN_ON(xa_err(xa_ret)); /* if this is the last call to rxe_put complete the * object. It is safe to touch obj->elem after this since * it is freed below */ __rxe_put(elem); /* wait until all references to the object have been * dropped before final object specific cleanup and * return to rdma-core */ if (sleepable) { if (!completion_done(&elem->complete)) { ret = wait_for_completion_timeout(&elem->complete, msecs_to_jiffies(50000)); /* Shouldn't happen. There are still references to * the object but, rather than deadlock, free the * object or pass back to rdma-core. */ if (WARN_ON(!ret)) err = -ETIMEDOUT; } } else { unsigned long until = jiffies + RXE_POOL_TIMEOUT; /* AH objects are unique in that the destroy_ah verb * can be called in atomic context. This delay * replaces the wait_for_completion call above * when the destroy_ah call is not sleepable */ while (!completion_done(&elem->complete) && time_before(jiffies, until)) mdelay(1); if (WARN_ON(!completion_done(&elem->complete))) err = -ETIMEDOUT; } if (pool->cleanup) pool->cleanup(elem); atomic_dec(&pool->num_elem); return err; } int __rxe_get(struct rxe_pool_elem *elem) { return kref_get_unless_zero(&elem->ref_cnt); } int __rxe_put(struct rxe_pool_elem *elem) { return kref_put(&elem->ref_cnt, rxe_elem_release); } void __rxe_finalize(struct rxe_pool_elem *elem) { void *xa_ret; xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL); WARN_ON(xa_err(xa_ret)); }
28 3 1 1 1 1 1 1 2 17 6 7 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_meta.h> #include <net/netfilter/nf_tables_offload.h> #include <linux/tcp.h> #include <linux/udp.h> #include <net/gre.h> #include <net/geneve.h> #include <net/ip.h> #include <linux/icmpv6.h> #include <linux/ip.h> #include <linux/ipv6.h> static DEFINE_PER_CPU(struct nft_inner_tun_ctx, nft_pcpu_tun_ctx); /* Same layout as nft_expr but it embeds the private expression data area. */ struct __nft_expr { const struct nft_expr_ops *ops; union { struct nft_payload payload; struct nft_meta meta; } __attribute__((aligned(__alignof__(u64)))); }; enum { NFT_INNER_EXPR_PAYLOAD, NFT_INNER_EXPR_META, }; struct nft_inner { u8 flags; u8 hdrsize; u8 type; u8 expr_type; struct __nft_expr expr; }; static int nft_inner_parse_l2l3(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 off) { __be16 llproto, outer_llproto; u32 nhoff, thoff; if (priv->flags & NFT_INNER_LL) { struct vlan_ethhdr *veth, _veth; struct ethhdr *eth, _eth; u32 hdrsize; eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth); if (!eth) return -1; switch (eth->h_proto) { case htons(ETH_P_IP): case htons(ETH_P_IPV6): llproto = eth->h_proto; hdrsize = sizeof(_eth); break; case htons(ETH_P_8021Q): veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth); if (!veth) return -1; outer_llproto = veth->h_vlan_encapsulated_proto; llproto = veth->h_vlan_proto; hdrsize = sizeof(_veth); break; default: return -1; } ctx->inner_lloff = off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL; off += hdrsize; } else { struct iphdr *iph; u32 _version; iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version); if (!iph) return -1; switch (iph->version) { case 4: llproto = htons(ETH_P_IP); break; case 6: llproto = htons(ETH_P_IPV6); break; default: return -1; } } ctx->llproto = llproto; if (llproto == htons(ETH_P_8021Q)) llproto = outer_llproto; nhoff = off; switch (llproto) { case htons(ETH_P_IP): { struct iphdr *iph, _iph; iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph); if (!iph) return -1; if (iph->ihl < 5 || iph->version != 4) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff + (iph->ihl * 4); if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) { ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = iph->protocol; } } break; case htons(ETH_P_IPV6): { struct ipv6hdr *ip6h, _ip6h; int fh_flags = IP6_FH_F_AUTH; unsigned short fragoff; int l4proto; ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h); if (!ip6h) return -1; if (ip6h->version != 6) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff; l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags); if (l4proto < 0 || thoff > U16_MAX) return -1; if (fragoff == 0) { thoff = nhoff + sizeof(_ip6h); ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = l4proto; } } break; default: return -1; } return 0; } static int nft_inner_parse_tunhdr(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 *off) { if (pkt->tprot == IPPROTO_GRE) { ctx->inner_tunoff = pkt->thoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; return 0; } if (pkt->tprot != IPPROTO_UDP) return -1; ctx->inner_tunoff = *off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; *off += priv->hdrsize; switch (priv->type) { case NFT_INNER_GENEVE: { struct genevehdr *gnvh, _gnvh; gnvh = skb_header_pointer(pkt->skb, pkt->inneroff, sizeof(_gnvh), &_gnvh); if (!gnvh) return -1; *off += gnvh->opt_len * 4; } break; default: break; } return 0; } static int nft_inner_parse(const struct nft_inner *priv, struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { u32 off = pkt->inneroff; if (priv->flags & NFT_INNER_HDRSIZE && nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0) return -1; if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0) return -1; } else if (priv->flags & NFT_INNER_TH) { tun_ctx->inner_thoff = off; tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; } tun_ctx->type = priv->type; tun_ctx->cookie = (unsigned long)pkt->skb; pkt->flags |= NFT_PKTINFO_INNER_FULL; return 0; } static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { local_bh_enable(); return false; } *tun_ctx = *this_cpu_tun_ctx; local_bh_enable(); return true; } static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt, const struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); if (this_cpu_tun_ctx->cookie != tun_ctx->cookie) *this_cpu_tun_ctx = *tun_ctx; local_bh_enable(); } static bool nft_inner_parse_needed(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) return true; if (!nft_inner_restore_tun_ctx(pkt, tun_ctx)) return true; if (priv->type != tun_ctx->type) return true; return false; } static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_inner *priv = nft_expr_priv(expr); struct nft_inner_tun_ctx tun_ctx = {}; if (nft_payload_inner_offset(pkt) < 0) goto err; if (nft_inner_parse_needed(priv, pkt, &tun_ctx) && nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0) goto err; switch (priv->expr_type) { case NFT_INNER_EXPR_PAYLOAD: nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; case NFT_INNER_EXPR_META: nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; default: WARN_ON_ONCE(1); goto err; } nft_inner_save_tun_ctx(pkt, &tun_ctx); return; err: regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { [NFTA_INNER_NUM] = { .type = NLA_U32 }, [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, [NFTA_INNER_TYPE] = { .type = NLA_U32 }, [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, }; struct nft_expr_info { const struct nft_expr_ops *ops; const struct nlattr *attr; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nft_inner_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_inner *priv = nft_expr_priv(expr); u32 flags, hdrsize, type, num; struct nft_expr_info expr_info; int err; if (!tb[NFTA_INNER_FLAGS] || !tb[NFTA_INNER_NUM] || !tb[NFTA_INNER_HDRSIZE] || !tb[NFTA_INNER_TYPE] || !tb[NFTA_INNER_EXPR]) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS])); if (flags & ~NFT_INNER_MASK) return -EOPNOTSUPP; num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM])); if (num != 0) return -EOPNOTSUPP; hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE])); type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE])); if (type > U8_MAX) return -EINVAL; if (flags & NFT_INNER_HDRSIZE) { if (hdrsize == 0 || hdrsize > 64) return -EOPNOTSUPP; } priv->flags = flags; priv->hdrsize = hdrsize; priv->type = type; err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info); if (err < 0) return err; priv->expr.ops = expr_info.ops; if (!strcmp(expr_info.ops->type->name, "payload")) priv->expr_type = NFT_INNER_EXPR_PAYLOAD; else if (!strcmp(expr_info.ops->type->name, "meta")) priv->expr_type = NFT_INNER_EXPR_META; else return -EINVAL; err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr, (const struct nlattr * const*)expr_info.tb); if (err < 0) return err; return 0; } static int nft_inner_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_inner *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) || nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) || nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) || nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize))) goto nla_put_failure; if (nft_expr_dump(skb, NFTA_INNER_EXPR, (struct nft_expr *)&priv->expr, reset) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nft_expr_ops nft_inner_ops = { .type = &nft_inner_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)), .eval = nft_inner_eval, .init = nft_inner_init, .dump = nft_inner_dump, }; struct nft_expr_type nft_inner_type __read_mostly = { .name = "inner", .ops = &nft_inner_ops, .policy = nft_inner_policy, .maxattr = NFTA_INNER_MAX, .owner = THIS_MODULE, };
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 // SPDX-License-Identifier: GPL-2.0 /* * Utility functions for file contents encryption/decryption on * block device-based filesystems. * * Copyright (C) 2015, Google, Inc. * Copyright (C) 2015, Motorola Mobility */ #include <linux/pagemap.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/namei.h> #include "fscrypt_private.h" /** * fscrypt_decrypt_bio() - decrypt the contents of a bio * @bio: the bio to decrypt * * Decrypt the contents of a "read" bio following successful completion of the * underlying disk read. The bio must be reading a whole number of blocks of an * encrypted file directly into the page cache. If the bio is reading the * ciphertext into bounce pages instead of the page cache (for example, because * the file is also compressed, so decompression is required after decryption), * then this function isn't applicable. This function may sleep, so it must be * called from a workqueue rather than from the bio's bi_end_io callback. * * Return: %true on success; %false on failure. On failure, bio->bi_status is * also set to an error status. */ bool fscrypt_decrypt_bio(struct bio *bio) { struct folio_iter fi; bio_for_each_folio_all(fi, bio) { int err = fscrypt_decrypt_pagecache_blocks(fi.folio, fi.length, fi.offset); if (err) { bio->bi_status = errno_to_blk_status(err); return false; } } return true; } EXPORT_SYMBOL(fscrypt_decrypt_bio); static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { const unsigned int blockbits = inode->i_blkbits; const unsigned int blocks_per_page = 1 << (PAGE_SHIFT - blockbits); struct bio *bio; int ret, err = 0; int num_pages = 0; /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ bio = bio_alloc(inode->i_sb->s_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOFS); while (len) { unsigned int blocks_this_page = min(len, blocks_per_page); unsigned int bytes_this_page = blocks_this_page << blockbits; if (num_pages == 0) { fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS); bio->bi_iter.bi_sector = pblk << (blockbits - SECTOR_SHIFT); } ret = bio_add_page(bio, ZERO_PAGE(0), bytes_this_page, 0); if (WARN_ON_ONCE(ret != bytes_this_page)) { err = -EIO; goto out; } num_pages++; len -= blocks_this_page; lblk += blocks_this_page; pblk += blocks_this_page; if (num_pages == BIO_MAX_VECS || !len || !fscrypt_mergeable_bio(bio, inode, lblk)) { err = submit_bio_wait(bio); if (err) goto out; bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE); num_pages = 0; } } out: bio_put(bio); return err; } /** * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file * @inode: the file's inode * @lblk: the first file logical block to zero out * @pblk: the first filesystem physical block to zero out * @len: number of blocks to zero out * * Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write * ciphertext blocks which decrypt to the all-zeroes block. The blocks must be * both logically and physically contiguous. It's also assumed that the * filesystem only uses a single block device, ->s_bdev. * * Note that since each block uses a different IV, this involves writing a * different ciphertext to each block; we can't simply reuse the same one. * * Return: 0 on success; -errno on failure. */ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { const struct fscrypt_inode_info *ci = inode->i_crypt_info; const unsigned int du_bits = ci->ci_data_unit_bits; const unsigned int du_size = 1U << du_bits; const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits; const unsigned int du_per_page = 1U << du_per_page_bits; u64 du_index = (u64)lblk << (inode->i_blkbits - du_bits); u64 du_remaining = (u64)len << (inode->i_blkbits - du_bits); sector_t sector = pblk << (inode->i_blkbits - SECTOR_SHIFT); struct page *pages[16]; /* write up to 16 pages at a time */ unsigned int nr_pages; unsigned int i; unsigned int offset; struct bio *bio; int ret, err; if (len == 0) return 0; if (fscrypt_inode_uses_inline_crypto(inode)) return fscrypt_zeroout_range_inline_crypt(inode, lblk, pblk, len); BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_VECS); nr_pages = min_t(u64, ARRAY_SIZE(pages), (du_remaining + du_per_page - 1) >> du_per_page_bits); /* * We need at least one page for ciphertext. Allocate the first one * from a mempool, with __GFP_DIRECT_RECLAIM set so that it can't fail. * * Any additional page allocations are allowed to fail, as they only * help performance, and waiting on the mempool for them could deadlock. */ for (i = 0; i < nr_pages; i++) { pages[i] = fscrypt_alloc_bounce_page(i == 0 ? GFP_NOFS : GFP_NOWAIT | __GFP_NOWARN); if (!pages[i]) break; } nr_pages = i; if (WARN_ON_ONCE(nr_pages <= 0)) return -EINVAL; /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ bio = bio_alloc(inode->i_sb->s_bdev, nr_pages, REQ_OP_WRITE, GFP_NOFS); do { bio->bi_iter.bi_sector = sector; i = 0; offset = 0; do { err = fscrypt_crypt_data_unit(ci, FS_ENCRYPT, du_index, ZERO_PAGE(0), pages[i], du_size, offset, GFP_NOFS); if (err) goto out; du_index++; sector += 1U << (du_bits - SECTOR_SHIFT); du_remaining--; offset += du_size; if (offset == PAGE_SIZE || du_remaining == 0) { ret = bio_add_page(bio, pages[i++], offset, 0); if (WARN_ON_ONCE(ret != offset)) { err = -EIO; goto out; } offset = 0; } } while (i != nr_pages && du_remaining != 0); err = submit_bio_wait(bio); if (err) goto out; bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE); } while (du_remaining != 0); err = 0; out: bio_put(bio); for (i = 0; i < nr_pages; i++) fscrypt_free_bounce_page(pages[i]); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range);
2496 2498 2493 2494 1432 1619 9586 9578 9580 9576 4809 6594 37454 37460 37456 37462 37484 6055 12667 2461 92 157 123 3 9567 224 3 13486 4942 9622 9614 9646 268 9539 5 183 182 183 183 101 137 24 24 24 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2021, Google LLC. * Pasha Tatashin <pasha.tatashin@soleen.com> */ #include <linux/kstrtox.h> #include <linux/mm.h> #include <linux/page_table_check.h> #include <linux/swap.h> #include <linux/swapops.h> #undef pr_fmt #define pr_fmt(fmt) "page_table_check: " fmt struct page_table_check { atomic_t anon_map_count; atomic_t file_map_count; }; static bool __page_table_check_enabled __initdata = IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED); DEFINE_STATIC_KEY_TRUE(page_table_check_disabled); EXPORT_SYMBOL(page_table_check_disabled); static int __init early_page_table_check_param(char *buf) { return kstrtobool(buf, &__page_table_check_enabled); } early_param("page_table_check", early_page_table_check_param); static bool __init need_page_table_check(void) { return __page_table_check_enabled; } static void __init init_page_table_check(void) { if (!__page_table_check_enabled) return; static_branch_disable(&page_table_check_disabled); } struct page_ext_operations page_table_check_ops = { .size = sizeof(struct page_table_check), .need = need_page_table_check, .init = init_page_table_check, .need_shared_flags = false, }; static struct page_table_check *get_page_table_check(struct page_ext *page_ext) { BUG_ON(!page_ext); return page_ext_data(page_ext, &page_table_check_ops); } /* * An entry is removed from the page table, decrement the counters for that page * verify that it is of correct type and counters do not become negative. */ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt) { struct page_ext *page_ext; struct page *page; unsigned long i; bool anon; if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); page_ext = page_ext_get(page); if (!page_ext) return; BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { struct page_table_check *ptc = get_page_table_check(page_ext); if (anon) { BUG_ON(atomic_read(&ptc->file_map_count)); BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0); } else { BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0); } page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } /* * A new entry is added to the page table, increment the counters for that page * verify that it is of correct type and is not being mapped with a different * type to a different process. */ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt, bool rw) { struct page_ext *page_ext; struct page *page; unsigned long i; bool anon; if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); page_ext = page_ext_get(page); if (!page_ext) return; BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { struct page_table_check *ptc = get_page_table_check(page_ext); if (anon) { BUG_ON(atomic_read(&ptc->file_map_count)); BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw); } else { BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0); } page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } /* * page is on free list, or is being allocated, verify that counters are zeroes * crash if they are not. */ void __page_table_check_zero(struct page *page, unsigned int order) { struct page_ext *page_ext; unsigned long i; BUG_ON(PageSlab(page)); page_ext = page_ext_get(page); if (!page_ext) return; for (i = 0; i < (1ul << order); i++) { struct page_table_check *ptc = get_page_table_check(page_ext); BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_read(&ptc->file_map_count)); page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { if (&init_mm == mm) return; if (pte_user_accessible_page(pte)) { page_table_check_clear(pte_pfn(pte), PAGE_SIZE >> PAGE_SHIFT); } } EXPORT_SYMBOL(__page_table_check_pte_clear); void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { if (&init_mm == mm) return; if (pmd_user_accessible_page(pmd)) { page_table_check_clear(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT); } } EXPORT_SYMBOL(__page_table_check_pmd_clear); void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { if (&init_mm == mm) return; if (pud_user_accessible_page(pud)) { page_table_check_clear(pud_pfn(pud), PUD_SIZE >> PAGE_SHIFT); } } EXPORT_SYMBOL(__page_table_check_pud_clear); /* Whether the swap entry cached writable information */ static inline bool swap_cached_writable(swp_entry_t entry) { return is_writable_device_exclusive_entry(entry) || is_writable_device_private_entry(entry) || is_writable_migration_entry(entry); } static inline void page_table_check_pte_flags(pte_t pte) { if (pte_present(pte) && pte_uffd_wp(pte)) WARN_ON_ONCE(pte_write(pte)); else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte)) WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte))); } void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, unsigned int nr) { unsigned int i; if (&init_mm == mm) return; page_table_check_pte_flags(pte); for (i = 0; i < nr; i++) __page_table_check_pte_clear(mm, ptep_get(ptep + i)); if (pte_user_accessible_page(pte)) page_table_check_set(pte_pfn(pte), nr, pte_write(pte)); } EXPORT_SYMBOL(__page_table_check_ptes_set); static inline void page_table_check_pmd_flags(pmd_t pmd) { if (pmd_present(pmd) && pmd_uffd_wp(pmd)) WARN_ON_ONCE(pmd_write(pmd)); else if (is_swap_pmd(pmd) && pmd_swp_uffd_wp(pmd)) WARN_ON_ONCE(swap_cached_writable(pmd_to_swp_entry(pmd))); } void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd) { if (&init_mm == mm) return; page_table_check_pmd_flags(pmd); __page_table_check_pmd_clear(mm, *pmdp); if (pmd_user_accessible_page(pmd)) { page_table_check_set(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT, pmd_write(pmd)); } } EXPORT_SYMBOL(__page_table_check_pmd_set); void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud) { if (&init_mm == mm) return; __page_table_check_pud_clear(mm, *pudp); if (pud_user_accessible_page(pud)) { page_table_check_set(pud_pfn(pud), PUD_SIZE >> PAGE_SHIFT, pud_write(pud)); } } EXPORT_SYMBOL(__page_table_check_pud_set); void __page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { if (&init_mm == mm) return; if (!pmd_bad(pmd) && !pmd_leaf(pmd)) { pte_t *ptep = pte_offset_map(&pmd, addr); unsigned long i; if (WARN_ON(!ptep)) return; for (i = 0; i < PTRS_PER_PTE; i++) { __page_table_check_pte_clear(mm, ptep_get(ptep)); addr += PAGE_SIZE; ptep++; } pte_unmap(ptep - PTRS_PER_PTE); } }
125 125 77 114 114 46 9 202 202 126 123 9 116 1 8 1 209 3 3 174 45 204 1 1 2 195 7 188 14 40 162 174 42 3 7 192 202 172 45 201 1 85 132 94 126 202 202 201 3 3 139 2 137 2 4 10 121 10 1 9 9 9 9 1 8 1 2 1 1 282 282 1 3 229 60 12 47 1 263 34 34 2 1 1 30 30 34 4 3 1 12 1 12 1 1 1 7 1 1 7 7 1 49 1 1 2 45 3 42 3 9 2 7 15 1 14 1 1 3 9 39 1 1 24 23 1 23 16 5 16 28 2 18 1 17 16 8 1 40 39 16 23 16 1 1 30 6 1 30 5 6 2 3 4 1 2 6 21 7 26 6 12 2 19 26 4 21 9 19 5 6 30 4 1 1 8 8 1 7 4 2 2 2 2 2 2 6 2 6 2 6 2 6 2 6 2 6 2 5 3 8 50 50 50 2 9 41 50 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 // SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/namei.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/pagemap.h> #include <linux/sched.h> #include <linux/ctype.h> #include <linux/random.h> #include <linux/dcache.h> #include <linux/namei.h> #include <linux/quotaops.h> #include "f2fs.h" #include "node.h" #include "segment.h" #include "xattr.h" #include "acl.h" #include <trace/events/f2fs.h> static inline bool is_extension_exist(const unsigned char *s, const char *sub, bool tmp_ext, bool tmp_dot) { size_t slen = strlen(s); size_t sublen = strlen(sub); int i; if (sublen == 1 && *sub == '*') return true; /* * filename format of multimedia file should be defined as: * "filename + '.' + extension + (optional: '.' + temp extension)". */ if (slen < sublen + 2) return false; if (!tmp_ext) { /* file has no temp extension */ if (s[slen - sublen - 1] != '.') return false; return !strncasecmp(s + slen - sublen, sub, sublen); } for (i = 1; i < slen - sublen; i++) { if (s[i] != '.') continue; if (!strncasecmp(s + i + 1, sub, sublen)) { if (!tmp_dot) return true; if (i == slen - sublen - 1 || s[i + 1 + sublen] == '.') return true; } } return false; } static inline bool is_temperature_extension(const unsigned char *s, const char *sub) { return is_extension_exist(s, sub, true, false); } static inline bool is_compress_extension(const unsigned char *s, const char *sub) { return is_extension_exist(s, sub, true, true); } int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; int cold_count = le32_to_cpu(sbi->raw_super->extension_count); int hot_count = sbi->raw_super->hot_ext_count; int total_count = cold_count + hot_count; int start, count; int i; if (set) { if (total_count == F2FS_MAX_EXTENSION) return -EINVAL; } else { if (!hot && !cold_count) return -EINVAL; if (hot && !hot_count) return -EINVAL; } if (hot) { start = cold_count; count = total_count; } else { start = 0; count = cold_count; } for (i = start; i < count; i++) { if (strcmp(name, extlist[i])) continue; if (set) return -EINVAL; memcpy(extlist[i], extlist[i + 1], F2FS_EXTENSION_LEN * (total_count - i - 1)); memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); if (hot) sbi->raw_super->hot_ext_count = hot_count - 1; else sbi->raw_super->extension_count = cpu_to_le32(cold_count - 1); return 0; } if (!set) return -EINVAL; if (hot) { memcpy(extlist[count], name, strlen(name)); sbi->raw_super->hot_ext_count = hot_count + 1; } else { char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; memcpy(buf, &extlist[cold_count], F2FS_EXTENSION_LEN * hot_count); memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); memcpy(extlist[cold_count], name, strlen(name)); memcpy(&extlist[cold_count + 1], buf, F2FS_EXTENSION_LEN * hot_count); sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); } return 0; } static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir, struct inode *inode, const unsigned char *name) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; unsigned char (*noext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).noextensions; unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions; unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; int i, cold_count, hot_count; if (!f2fs_sb_has_compression(sbi)) return; if (S_ISDIR(inode->i_mode)) goto inherit_comp; /* This name comes only from normal files. */ if (!name) return; /* Don't compress hot files. */ f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; for (i = cold_count; i < cold_count + hot_count; i++) if (is_temperature_extension(name, extlist[i])) break; f2fs_up_read(&sbi->sb_lock); if (i < (cold_count + hot_count)) return; /* Don't compress unallowed extension. */ for (i = 0; i < noext_cnt; i++) if (is_compress_extension(name, noext[i])) return; /* Compress wanting extension. */ for (i = 0; i < ext_cnt; i++) { if (is_compress_extension(name, ext[i])) { set_compress_context(inode); return; } } inherit_comp: /* Inherit the {no-}compression flag in directory */ if (F2FS_I(dir)->i_flags & F2FS_NOCOMP_FL) { F2FS_I(inode)->i_flags |= F2FS_NOCOMP_FL; f2fs_mark_inode_dirty_sync(inode, true); } else if (F2FS_I(dir)->i_flags & F2FS_COMPR_FL) { set_compress_context(inode); } } /* * Set file's temperature for hot/cold data separation */ static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; int i, cold_count, hot_count; f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; for (i = 0; i < cold_count + hot_count; i++) if (is_temperature_extension(name, extlist[i])) break; f2fs_up_read(&sbi->sb_lock); if (i == cold_count + hot_count) return; if (i < cold_count) file_set_cold(inode); else file_set_hot(inode); } static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, struct inode *dir, umode_t mode, const char *name) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct f2fs_inode_info *fi; nid_t ino; struct inode *inode; bool nid_free = false; bool encrypt = false; int xattr_size = 0; int err; inode = new_inode(dir->i_sb); if (!inode) return ERR_PTR(-ENOMEM); if (!f2fs_alloc_nid(sbi, &ino)) { err = -ENOSPC; goto fail; } nid_free = true; inode_init_owner(idmap, inode, dir, mode); fi = F2FS_I(inode); inode->i_ino = ino; inode->i_blocks = 0; simple_inode_init_ts(inode); fi->i_crtime = inode_get_mtime(inode); inode->i_generation = get_random_u32(); if (S_ISDIR(inode->i_mode)) fi->i_current_depth = 1; err = insert_inode_locked(inode); if (err) { err = -EINVAL; goto fail; } if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) fi->i_projid = F2FS_I(dir)->i_projid; else fi->i_projid = make_kprojid(&init_user_ns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); if (err) goto fail_drop; err = f2fs_dquot_initialize(inode); if (err) goto fail_drop; set_inode_flag(inode, FI_NEW_INODE); if (encrypt) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); fi->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); if (f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) xattr_size = F2FS_OPTION(sbi).inline_xattr_size; /* Otherwise, will be 0 */ } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { xattr_size = DEFAULT_INLINE_XATTR_ADDRS; } fi->i_inline_xattr_size = xattr_size; fi->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); if (S_ISDIR(inode->i_mode)) fi->i_flags |= F2FS_INDEX_FL; if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); /* Check compression first. */ set_compress_new_inode(sbi, dir, inode, name); /* Should enable inline_data after compression set */ if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(inode, FI_INLINE_DATA); if (name && !test_opt(sbi, DISABLE_EXT_IDENTIFY)) set_file_temperature(sbi, inode, name); stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); f2fs_set_inode_flags(inode); f2fs_init_extent_tree(inode); trace_f2fs_new_inode(inode, 0); return inode; fail: trace_f2fs_new_inode(inode, err); make_bad_inode(inode); if (nid_free) set_inode_flag(inode, FI_FREE_NID); iput(inode); return ERR_PTR(err); fail_drop: trace_f2fs_new_inode(inode, err); dquot_drop(inode); inode->i_flags |= S_NOQUOTA; make_bad_inode(inode); if (nid_free) set_inode_flag(inode, FI_FREE_NID); clear_nlink(inode); unlock_new_inode(inode); iput(inode); return ERR_PTR(err); } static int f2fs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; nid_t ino = 0; int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; err = f2fs_dquot_initialize(dir); if (err) return err; inode = f2fs_new_inode(idmap, dir, mode, dentry->d_name.name); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) goto out; f2fs_unlock_op(sbi); f2fs_alloc_nid_done(sbi, ino); d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); f2fs_balance_fs(sbi, true); return 0; out: f2fs_handle_failed_inode(inode); return err; } static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; err = fscrypt_prepare_link(old_dentry, dir, dentry); if (err) return err; if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(dir)->i_projid, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(dir); if (err) return err; f2fs_balance_fs(sbi, true); inode_set_ctime_current(inode); ihold(inode); set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) goto out; f2fs_unlock_op(sbi); d_instantiate(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); return 0; out: clear_inode_flag(inode, FI_INC_LINK); iput(inode); f2fs_unlock_op(sbi); return err; } struct dentry *f2fs_get_parent(struct dentry *child) { struct page *page; unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &page); if (!ino) { if (IS_ERR(page)) return ERR_CAST(page); return ERR_PTR(-ENOENT); } return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct f2fs_dir_entry *de; struct page *page; struct dentry *new; nid_t ino = -1; int err = 0; struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); if (dentry->d_name.len > F2FS_NAME_LEN) { err = -ENAMETOOLONG; goto out; } err = f2fs_prepare_lookup(dir, dentry, &fname); if (err == -ENOENT) goto out_splice; if (err) goto out; de = __f2fs_find_entry(dir, &fname, &page); f2fs_free_filename(&fname); if (!de) { if (IS_ERR(page)) { err = PTR_ERR(page); goto out; } err = -ENOENT; goto out_splice; } ino = le32_to_cpu(de->ino); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out; } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu", dir->i_ino, inode->i_ino); err = -EPERM; goto out_iput; } out_splice: if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as * well. For now, prevent the negative dentry * from being cached. */ trace_f2fs_lookup_end(dir, dentry, ino, err); return NULL; } new = d_splice_alias(inode, dentry); trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry, ino, IS_ERR(new) ? PTR_ERR(new) : err); return new; out_iput: iput(inode); out: trace_f2fs_lookup_end(dir, dentry, ino, err); return ERR_PTR(err); } static int f2fs_unlink(struct inode *dir, struct dentry *dentry) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode = d_inode(dentry); struct f2fs_dir_entry *de; struct page *page; int err; trace_f2fs_unlink_enter(dir, dentry); if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto fail; } err = f2fs_dquot_initialize(dir); if (err) goto fail; err = f2fs_dquot_initialize(inode); if (err) goto fail; de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { if (IS_ERR(page)) err = PTR_ERR(page); goto fail; } f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); f2fs_put_page(page, 0); goto fail; } f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at f2fs_lookup(), when it is better * supported by the VFS for the CI case. */ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: trace_f2fs_unlink_exit(inode, err); return err; } static const char *f2fs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { const char *link = page_get_link(dentry, inode, done); if (!IS_ERR(link) && !*link) { /* this is broken symlink case */ do_delayed_call(done); clear_delayed_call(done); link = ERR_PTR(-ENOENT); } return link; } static int f2fs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; size_t len = strlen(symname); struct fscrypt_str disk_link; int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); if (err) return err; err = f2fs_dquot_initialize(dir); if (err) return err; inode = f2fs_new_inode(idmap, dir, S_IFLNK | S_IRWXUGO, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); if (IS_ENCRYPTED(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; else inode->i_op = &f2fs_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &f2fs_dblock_aops; f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) goto out_f2fs_handle_failed_inode; f2fs_unlock_op(sbi); f2fs_alloc_nid_done(sbi, inode->i_ino); err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); if (err) goto err_out; err = page_symlink(inode, disk_link.name, disk_link.len); err_out: d_instantiate_new(dentry, inode); /* * Let's flush symlink data in order to avoid broken symlink as much as * possible. Nevertheless, fsyncing is the best way, but there is no * way to get a file descriptor in order to flush that. * * Note that, it needs to do dir->fsync to make this recoverable. * If the symlink path is stored into inline_data, there is no * performance regression. */ if (!err) { filemap_write_and_wait_range(inode->i_mapping, 0, disk_link.len - 1); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); } else { f2fs_unlink(dir, dentry); } f2fs_balance_fs(sbi, true); goto out_free_encrypted_link; out_f2fs_handle_failed_inode: f2fs_handle_failed_inode(inode); out_free_encrypted_link: if (disk_link.name != (unsigned char *)symname) kfree(disk_link.name); return err; } static int f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; err = f2fs_dquot_initialize(dir); if (err) return err; inode = f2fs_new_inode(idmap, dir, S_IFDIR | mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) goto out_fail; f2fs_unlock_op(sbi); f2fs_alloc_nid_done(sbi, inode->i_ino); d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); f2fs_balance_fs(sbi, true); return 0; out_fail: clear_inode_flag(inode, FI_INC_LINK); f2fs_handle_failed_inode(inode); return err; } static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); if (f2fs_empty_dir(inode)) return f2fs_unlink(dir, dentry); return -ENOTEMPTY; } static int f2fs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err = 0; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; err = f2fs_dquot_initialize(dir); if (err) return err; inode = f2fs_new_inode(idmap, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) goto out; f2fs_unlock_op(sbi); f2fs_alloc_nid_done(sbi, inode->i_ino); d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); f2fs_balance_fs(sbi, true); return 0; out: f2fs_handle_failed_inode(inode); return err; } static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode, bool is_whiteout, struct inode **new_inode, struct f2fs_filename *fname) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err; err = f2fs_dquot_initialize(dir); if (err) return err; inode = f2fs_new_inode(idmap, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); if (is_whiteout) { init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); inode->i_op = &f2fs_special_inode_operations; } else { inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; } f2fs_lock_op(sbi); err = f2fs_acquire_orphan_inode(sbi); if (err) goto out; err = f2fs_do_tmpfile(inode, dir, fname); if (err) goto release_out; /* * add this non-linked tmpfile to orphan list, in this way we could * remove all unused data of tmpfile after abnormal power-off. */ f2fs_add_orphan_inode(inode); f2fs_alloc_nid_done(sbi, inode->i_ino); if (is_whiteout) { f2fs_i_links_write(inode, false); spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } else { if (file) d_tmpfile(file, inode); else f2fs_i_links_write(inode, false); } /* link_count was changed by d_tmpfile as well. */ f2fs_unlock_op(sbi); unlock_new_inode(inode); if (new_inode) *new_inode = inode; f2fs_balance_fs(sbi, true); return 0; release_out: f2fs_release_orphan_inode(sbi); out: f2fs_handle_failed_inode(inode); return err; } static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL, NULL); return finish_open_simple(file, err); } static int f2fs_create_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct inode **whiteout, struct f2fs_filename *fname) { return __f2fs_tmpfile(idmap, dir, NULL, S_IFCHR | WHITEOUT_MODE, true, whiteout, fname); } int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct inode **new_inode) { return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode, NULL); } static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct inode *whiteout = NULL; struct page *old_dir_page = NULL; struct page *old_page, *new_page = NULL; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; bool old_is_dir = S_ISDIR(old_inode->i_mode); int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; /* * If new_inode is null, the below renaming flow will * add a link in old_dir which can convert inline_dir. * After then, if we failed to get the entry due to other * reasons like ENOMEM, we had to remove the new entry. * Instead of adding such the error handling routine, let's * simply convert first here. */ if (old_dir == new_dir && !new_inode) { err = f2fs_try_convert_inline_dir(old_dir, new_dentry); if (err) return err; } if (flags & RENAME_WHITEOUT) { struct f2fs_filename fname; err = f2fs_setup_filename(old_dir, &old_dentry->d_name, 0, &fname); if (err) return err; err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname); if (err) return err; } err = f2fs_dquot_initialize(old_dir); if (err) goto out; err = f2fs_dquot_initialize(new_dir); if (err) goto out; if (new_inode) { err = f2fs_dquot_initialize(new_inode); if (err) goto out; } err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) err = PTR_ERR(old_page); goto out; } if (old_is_dir && old_dir != new_dir) { old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); if (!old_dir_entry) { if (IS_ERR(old_dir_page)) err = PTR_ERR(old_dir_page); goto out_old; } } if (new_inode) { err = -ENOTEMPTY; if (old_is_dir && !f2fs_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_entry) { if (IS_ERR(new_page)) err = PTR_ERR(new_page); goto out_dir; } f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_acquire_orphan_inode(sbi); if (err) goto put_out_dir; f2fs_set_link(new_dir, new_entry, new_page, old_inode); new_page = NULL; inode_set_ctime_current(new_inode); f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (old_is_dir) f2fs_i_links_write(new_inode, false); f2fs_i_links_write(new_inode, false); f2fs_up_write(&F2FS_I(new_inode)->i_sem); if (!new_inode->i_nlink) f2fs_add_orphan_inode(new_inode); else f2fs_release_orphan_inode(sbi); } else { f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_add_link(new_dentry, old_inode); if (err) { f2fs_unlock_op(sbi); goto out_dir; } if (old_is_dir) f2fs_i_links_write(new_dir, true); } f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_is_dir || whiteout) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); f2fs_up_write(&F2FS_I(old_inode)->i_sem); inode_set_ctime_current(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); old_page = NULL; if (whiteout) { set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); if (err) goto put_out_dir; spin_lock(&whiteout->i_lock); whiteout->i_state &= ~I_LINKABLE; spin_unlock(&whiteout->i_lock); iput(whiteout); } if (old_dir_entry) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); if (old_is_dir) f2fs_i_links_write(old_dir, false); if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); if (S_ISDIR(old_inode->i_mode)) f2fs_add_ino_entry(sbi, old_inode->i_ino, TRANS_DIR_INO); } f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); f2fs_update_time(sbi, REQ_TIME); return 0; put_out_dir: f2fs_unlock_op(sbi); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); out: iput(whiteout); return err; } static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct page *old_dir_page, *new_dir_page; struct page *old_page, *new_page; struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, F2FS_I(old_dentry->d_inode)->i_projid)) || (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(old_dir)->i_projid, F2FS_I(new_dentry->d_inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(old_dir); if (err) goto out; err = f2fs_dquot_initialize(new_dir); if (err) goto out; err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) err = PTR_ERR(old_page); goto out; } new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_entry) { if (IS_ERR(new_page)) err = PTR_ERR(new_page); goto out_old; } /* prepare for updating ".." directory entry info later */ if (old_dir != new_dir) { if (S_ISDIR(old_inode->i_mode)) { old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); if (!old_dir_entry) { if (IS_ERR(old_dir_page)) err = PTR_ERR(old_dir_page); goto out_new; } } if (S_ISDIR(new_inode->i_mode)) { new_dir_entry = f2fs_parent_dir(new_inode, &new_dir_page); if (!new_dir_entry) { if (IS_ERR(new_dir_page)) err = PTR_ERR(new_dir_page); goto out_old_dir; } } } /* * If cross rename between file and directory those are not * in the same directory, we will inc nlink of file's parent * later, so we should check upper boundary of its nlink. */ if ((!old_dir_entry || !new_dir_entry) && old_dir_entry != new_dir_entry) { old_nlink = old_dir_entry ? -1 : 1; new_nlink = -old_nlink; err = -EMLINK; if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) || (new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX)) goto out_new_dir; } f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); /* update ".." directory entry info of old dentry */ if (old_dir_entry) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); /* update ".." directory entry info of new dentry */ if (new_dir_entry) f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); /* update directory entry info of old dir inode */ f2fs_set_link(old_dir, old_entry, old_page, new_inode); f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); f2fs_up_write(&F2FS_I(old_inode)->i_sem); inode_set_ctime_current(old_dir); if (old_nlink) { f2fs_down_write(&F2FS_I(old_dir)->i_sem); f2fs_i_links_write(old_dir, old_nlink > 0); f2fs_up_write(&F2FS_I(old_dir)->i_sem); } f2fs_mark_inode_dirty_sync(old_dir, false); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (!new_dir_entry) file_lost_pino(new_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(new_inode, old_dir->i_ino); f2fs_up_write(&F2FS_I(new_inode)->i_sem); inode_set_ctime_current(new_dir); if (new_nlink) { f2fs_down_write(&F2FS_I(new_dir)->i_sem); f2fs_i_links_write(new_dir, new_nlink > 0); f2fs_up_write(&F2FS_I(new_dir)->i_sem); } f2fs_mark_inode_dirty_sync(new_dir, false); if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { f2fs_add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); } f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); f2fs_update_time(sbi, REQ_TIME); return 0; out_new_dir: if (new_dir_entry) { f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { f2fs_put_page(old_dir_page, 0); } out_new: f2fs_put_page(new_page, 0); out_old: f2fs_put_page(old_page, 0); out: return err; } static int f2fs_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { int err; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; trace_f2fs_rename_start(old_dir, old_dentry, new_dir, new_dentry, flags); err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, flags); if (err) return err; if (flags & RENAME_EXCHANGE) err = f2fs_cross_rename(old_dir, old_dentry, new_dir, new_dentry); else /* * VFS has already handled the new dentry existence case, * here, we just deal with "RENAME_NOREPLACE" as regular rename. */ err = f2fs_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); trace_f2fs_rename_end(old_dentry, new_dentry, flags, err); return err; } static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct page *page; const char *target; if (!dentry) return ERR_PTR(-ECHILD); page = read_mapping_page(inode->i_mapping, 0, NULL); if (IS_ERR(page)) return ERR_CAST(page); target = fscrypt_get_symlink(inode, page_address(page), inode->i_sb->s_blocksize, done); put_page(page); return target; } static int f2fs_encrypted_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { f2fs_getattr(idmap, path, stat, request_mask, query_flags); return fscrypt_symlink_getattr(path, stat); } const struct inode_operations f2fs_encrypted_symlink_inode_operations = { .get_link = f2fs_encrypted_get_link, .getattr = f2fs_encrypted_symlink_getattr, .setattr = f2fs_setattr, .listxattr = f2fs_listxattr, }; const struct inode_operations f2fs_dir_inode_operations = { .create = f2fs_create, .lookup = f2fs_lookup, .link = f2fs_link, .unlink = f2fs_unlink, .symlink = f2fs_symlink, .mkdir = f2fs_mkdir, .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, .rename = f2fs_rename2, .tmpfile = f2fs_tmpfile, .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_inode_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, .fiemap = f2fs_fiemap, .fileattr_get = f2fs_fileattr_get, .fileattr_set = f2fs_fileattr_set, }; const struct inode_operations f2fs_symlink_inode_operations = { .get_link = f2fs_get_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, .listxattr = f2fs_listxattr, }; const struct inode_operations f2fs_special_inode_operations = { .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_inode_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, };
29 29 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 // SPDX-License-Identifier: GPL-2.0 /* -*- linux-c -*- * sysctl_net_core.c: sysctl interface to net core subsystem. * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/core directory entry (empty =) ). [MS] */ #include <linux/filter.h> #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/netdevice.h> #include <linux/ratelimit.h> #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/sched/isolation.h> #include <net/ip.h> #include <net/sock.h> #include <net/net_ratelimit.h> #include <net/busy_poll.h> #include <net/pkt_sched.h> #include <net/hotdata.h> #include <net/proto_memory.h> #include <net/rps.h> #include "dev.h" static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; static int net_msg_warn; /* Unused, but still a sysctl */ int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); /* 0 - Keep current behavior: * IPv4: inherit all current settings from init_net * IPv6: reset all settings to default * 1 - Both inherit all current settings from init_net * 2 - Both reset all settings to default * 3 - Both inherit all settings from current netns */ int sysctl_devconf_inherit_init_net __read_mostly; EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, struct cpumask *mask) { char *kbuf; int len; if (*ppos || !*lenp) { *lenp = 0; return 0; } /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 * nibbles (except the last one which has a newline instead). * Guesstimate the buffer size at the group granularity level. */ len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); kbuf = kmalloc(len, GFP_KERNEL); if (!kbuf) { *lenp = 0; return -ENOMEM; } len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); if (!len) { *lenp = 0; goto free_buf; } /* scnprintf writes a trailing null char not counted in the returned * length, override it with a newline. */ kbuf[len++] = '\n'; memcpy(buffer, kbuf, len); *lenp = len; *ppos += len; free_buf: kfree(kbuf); return 0; } #endif #ifdef CONFIG_RPS static struct cpumask *rps_default_mask_cow_alloc(struct net *net) { struct cpumask *rps_default_mask; if (net->core.rps_default_mask) return net->core.rps_default_mask; rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); if (!rps_default_mask) return NULL; /* pairs with READ_ONCE in rx_queue_default_mask() */ WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); return rps_default_mask; } static int rps_default_mask_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)table->data; int err = 0; rtnl_lock(); if (write) { struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); err = -ENOMEM; if (!rps_default_mask) goto done; err = cpumask_parse(buffer, rps_default_mask); if (err) goto done; err = rps_cpumask_housekeeping(rps_default_mask); if (err) goto done; } else { err = dump_cpumask(buffer, lenp, ppos, net->core.rps_default_mask ? : cpu_none_mask); } done: rtnl_unlock(); return err; } static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int orig_size, size; int ret, i; struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode }; struct rps_sock_flow_table *orig_sock_table, *sock_table; static DEFINE_MUTEX(sock_flow_mutex); mutex_lock(&sock_flow_mutex); orig_sock_table = rcu_dereference_protected( net_hotdata.rps_sock_flow_table, lockdep_is_held(&sock_flow_mutex)); size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write) { if (size) { if (size > 1<<29) { /* Enforce limit to prevent overflow */ mutex_unlock(&sock_flow_mutex); return -EINVAL; } size = roundup_pow_of_two(size); if (size != orig_size) { sock_table = vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); if (!sock_table) { mutex_unlock(&sock_flow_mutex); return -ENOMEM; } net_hotdata.rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; sock_table->mask = size - 1; } else sock_table = orig_sock_table; for (i = 0; i < size; i++) sock_table->ents[i] = RPS_NO_CPU; } else sock_table = NULL; if (sock_table != orig_sock_table) { rcu_assign_pointer(net_hotdata.rps_sock_flow_table, sock_table); if (sock_table) { static_branch_inc(&rps_needed); static_branch_inc(&rfs_needed); } if (orig_sock_table) { static_branch_dec(&rps_needed); static_branch_dec(&rfs_needed); kvfree_rcu_mightsleep(orig_sock_table); } } } mutex_unlock(&sock_flow_mutex); return ret; } #endif /* CONFIG_RPS */ #ifdef CONFIG_NET_FLOW_LIMIT static DEFINE_MUTEX(flow_limit_update_mutex); static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct sd_flow_limit *cur; struct softnet_data *sd; cpumask_var_t mask; int i, len, ret = 0; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; if (write) { ret = cpumask_parse(buffer, mask); if (ret) goto done; mutex_lock(&flow_limit_update_mutex); len = sizeof(*cur) + netdev_flow_limit_table_len; for_each_possible_cpu(i) { sd = &per_cpu(softnet_data, i); cur = rcu_dereference_protected(sd->flow_limit, lockdep_is_held(&flow_limit_update_mutex)); if (cur && !cpumask_test_cpu(i, mask)) { RCU_INIT_POINTER(sd->flow_limit, NULL); kfree_rcu_mightsleep(cur); } else if (!cur && cpumask_test_cpu(i, mask)) { cur = kzalloc_node(len, GFP_KERNEL, cpu_to_node(i)); if (!cur) { /* not unwinding previous changes */ ret = -ENOMEM; goto write_unlock; } cur->num_buckets = netdev_flow_limit_table_len; rcu_assign_pointer(sd->flow_limit, cur); } } write_unlock: mutex_unlock(&flow_limit_update_mutex); } else { cpumask_clear(mask); rcu_read_lock(); for_each_possible_cpu(i) { sd = &per_cpu(softnet_data, i); if (rcu_dereference(sd->flow_limit)) cpumask_set_cpu(i, mask); } rcu_read_unlock(); ret = dump_cpumask(buffer, lenp, ppos, mask); } done: free_cpumask_var(mask); return ret; } static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int old, *ptr; int ret; mutex_lock(&flow_limit_update_mutex); ptr = table->data; old = *ptr; ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write && !is_power_of_2(*ptr)) { *ptr = old; ret = -EINVAL; } mutex_unlock(&flow_limit_update_mutex); return ret; } #endif /* CONFIG_NET_FLOW_LIMIT */ #ifdef CONFIG_NET_SCHED static int set_default_qdisc(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { char id[IFNAMSIZ]; struct ctl_table tbl = { .data = id, .maxlen = IFNAMSIZ, }; int ret; qdisc_get_default(id, IFNAMSIZ); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = qdisc_set_default(id); return ret; } #endif static int proc_do_dev_weight(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { static DEFINE_MUTEX(dev_weight_mutex); int ret, weight; mutex_lock(&dev_weight_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { weight = READ_ONCE(weight_p); WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); } mutex_unlock(&dev_weight_mutex); return ret; } static int proc_do_rss_key(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table fake_table; char buf[NETDEV_RSS_KEY_LEN * 3]; snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); fake_table.data = buf; fake_table.maxlen = sizeof(buf); return proc_dostring(&fake_table, write, buffer, lenp, ppos); } #ifdef CONFIG_BPF_JIT static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, jit_enable = *(int *)table->data; int min = *(int *)table->extra1; int max = *(int *)table->extra2; struct ctl_table tmp = *table; if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; tmp.data = &jit_enable; ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); } else { ret = -EPERM; } } if (write && ret && min == max) pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); return ret; } # ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } # endif /* CONFIG_HAVE_EBPF_JIT */ static int proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif static struct ctl_table net_core_table[] = { { .procname = "mem_pcpu_rsv", .data = &net_hotdata.sysctl_mem_pcpu_rsv, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_mem_pcpu_rsv, }, { .procname = "dev_weight", .data = &weight_p, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, .extra1 = SYSCTL_ONE, }, { .procname = "dev_weight_rx_bias", .data = &dev_weight_rx_bias, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, .extra1 = SYSCTL_ONE, }, { .procname = "dev_weight_tx_bias", .data = &dev_weight_tx_bias, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, .extra1 = SYSCTL_ONE, }, { .procname = "netdev_max_backlog", .data = &net_hotdata.max_backlog, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "netdev_rss_key", .data = &netdev_rss_key, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_do_rss_key, }, #ifdef CONFIG_BPF_JIT { .procname = "bpf_jit_enable", .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax_bpf_enable, # ifdef CONFIG_BPF_JIT_ALWAYS_ON .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_ONE, # else .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, # endif }, # ifdef CONFIG_HAVE_EBPF_JIT { .procname = "bpf_jit_harden", .data = &bpf_jit_harden, .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax_bpf_restricted, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "bpf_jit_kallsyms", .data = &bpf_jit_kallsyms, .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax_bpf_restricted, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, # endif { .procname = "bpf_jit_limit", .data = &bpf_jit_limit, .maxlen = sizeof(long), .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, .extra1 = SYSCTL_LONG_ONE, .extra2 = &bpf_jit_limit_max, }, #endif { .procname = "netdev_tstamp_prequeue", .data = &net_hotdata.tstamp_prequeue, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "message_cost", .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "message_burst", .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #ifdef CONFIG_RPS { .procname = "rps_sock_flow_entries", .maxlen = sizeof(int), .mode = 0644, .proc_handler = rps_sock_flow_sysctl }, #endif #ifdef CONFIG_NET_FLOW_LIMIT { .procname = "flow_limit_cpu_bitmap", .mode = 0644, .proc_handler = flow_limit_cpu_sysctl }, { .procname = "flow_limit_table_len", .data = &netdev_flow_limit_table_len, .maxlen = sizeof(int), .mode = 0644, .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ #ifdef CONFIG_NET_RX_BUSY_POLL { .procname = "busy_poll", .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, #endif #ifdef CONFIG_NET_SCHED { .procname = "default_qdisc", .mode = 0644, .maxlen = IFNAMSIZ, .proc_handler = set_default_qdisc }, #endif { .procname = "netdev_budget", .data = &net_hotdata.netdev_budget, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "warnings", .data = &net_msg_warn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "max_skb_frags", .data = &net_hotdata.sysctl_max_skb_frags, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &max_skb_frags, }, { .procname = "netdev_budget_usecs", .data = &net_hotdata.netdev_budget_usecs, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, {